Scientific Features in NumPy and SciPy

[ad_1]

import datetime

 

import tensorflow as tf

import matplotlib.pyplot as plt

import numpy as np

import numba

 

def tSNE(X, ndims=2, perplexity=30, seed=0, max_iter=500, stop_lying_iter=100, mom_switch_iter=400):

    “”“The t-SNE algorithm

 

Args:

X: the high-dimensional coordinates

ndims: variety of dimensions in output area

    Returns:

        Factors of X in low dimension

    ““”

    momentum = 0.5

    final_momentum = 0.8

    eta = 200.0

    N, _D = X.form

    np.random.seed(seed)

 

    # normalize enter

    X -= X.imply(axis=0) # zero imply

    X /= np.abs(X).max() # min-max scaled

 

    # compute enter similarity for actual t-SNE

    P = computeGaussianPerplexity(X, perplexity)

    # symmetrize and normalize enter similarities

    P = P + P.T

    P /= P.sum()

    # lie in regards to the P-values

    P *= 12.0

    # initialize answer

    Y = np.random.randn(N, ndims) * 0.0001

    # carry out predominant coaching loop

    positive aspects = np.ones_like(Y)

    uY = np.zeros_like(Y)

    for i in vary(max_iter):

        # compute gradient, replace positive aspects

        dY = computeExactGradient(P, Y)

        positive aspects = np.the place(np.signal(dY) != np.signal(uY), positive aspects+0.2, positive aspects*0.8).clip(0.1)

        # gradient replace with momentum and positive aspects

        uY = momentum * uY eta * positive aspects * dY

        Y = Y + uY

        # make the answer zero-mean

        Y -= Y.imply(axis=0)

        # Cease mendacity in regards to the P-values after some time, and change momentum

        if i == stop_lying_iter:

            P /= 12.0

        if i == mom_switch_iter:

            momentum = remaining_momentum

        # print progress

        if (i % 50) == 0:

            C = evaluateError(P, Y)

            now = datetime.datetime.now()

            print(f“{now} – Iteration {i}: Error = {C}”)

    return Y

 

@numba.jit(nopython=True)

def computeExactGradient(P, Y):

    “”“Gradient of t-SNE value perform

 

Args:

        P: similarity matrix

        Y: low-dimensional coordinates

    Returns:

        dY, a numpy array of form (N,D)

“”

    N, _D = Y.form

    # compute squared Euclidean distance matrix of Y, the Q matrix, and the normalization sum

    DD = computeSquaredEuclideanDistance(Y)

    Q = 1/(1+DD)

    sum_Q = Q.sum()

    # compute gradient

    mult = (P (Q/sum_Q)) * Q

    dY = np.zeros_like(Y)

    for n in vary(N):

        for m in vary(N):

            if n==m: proceed

            dY[n] += (Y[n] Y[m]) * mult[n,m]

    return dY

 

@numba.jit(nopython=True)

def evaluateError(P, Y):

    “”“Consider t-SNE value perform

 

    Args:

        P: similarity matrix

        Y: low-dimensional coordinates

    Returns:

        Whole t-SNE error C

    ““”

    DD = computeSquaredEuclideanDistance(Y)

    # Compute Q-matrix and normalization sum

    Q = 1/(1+DD)

    np.fill_diagonal(Q, np.finfo(np.float32).eps)

    Q /= Q.sum()

    # Sum t-SNE error: sum P log(P/Q)

    error = P * np.log( (P + np.finfo(np.float32).eps) / (Q + np.finfo(np.float32).eps) )

    return error.sum()

 

@numba.jit(nopython=True)

def computeGaussianPerplexity(X, perplexity):

    “”“Compute Gaussian Perplexity

 

    Args:

        X: numpy array of form (N,D)

        perplexity: double

    Returns:

        Similarity matrix P

    ““”

    # Compute the squared Euclidean distance matrix

    N, _D = X.form

    DD = computeSquaredEuclideanDistance(X)

    # Compute the Gaussian kernel row by row

    P = np.zeros_like(DD)

    for n in vary(N):

        discovered = False

        beta = 1.0

        min_beta = np.inf

        max_beta = np.inf

        tol = 1e5

 

        # iterate till we get an excellent perplexity

        n_iter = 0

        whereas not discovered and n_iter < 200:

            # compute Gaussian kernel row

            P[n] = np.exp(beta * DD[n])

            P[n,n] = np.finfo(np.float32).eps

            # compute entropy of present row

            # Gaussians to be row-normalized to make it a chance

            # then H = sum_i -P[i] log(P[i])

            #        = sum_i -P[i] (-beta * DD[n] – log(sum_P))

            #        = sum_i P[i] * beta * DD[n] + log(sum_P)

            sum_P = P[n].sum()

            H = beta * (DD[n] @ P[n]) / sum_P + np.log(sum_P)

            # Consider if entropy inside tolerance degree

            Hdiff = H np.log2(perplexity)

            if tol < Hdiff < tol:

                discovered = True

                break

            if Hdiff > 0:

                min_beta = beta

                if max_beta in (np.inf, np.inf):

                    beta *= 2

                else:

                    beta = (beta + max_beta) / 2

            else:

                max_beta = beta

                if min_beta in (np.inf, np.inf):

                    beta /= 2

                else:

                    beta = (beta + min_beta) / 2

            n_iter += 1

        # normalize this row

        P[n] /= P[n].sum()

    assert not np.isnan(P).any()

    return P

 

@numba.jit(nopython=True)

def computeSquaredEuclideanDistance(X):

    “”“Compute squared distance

    Args:

        X: numpy array of form (N,D)

    Returns:

        numpy array of form (N,N) of squared distances

    ““”

    N, _D = X.form

    DD = np.zeros((N,N))

    for i in vary(N1):

        for j in vary(i+1, N):

            diff = X[i] X[j]

            DD[j][i] = DD[i][j] = diff @ diff

    return DD

 

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# choose 1000 samples from the dataset

rows = np.random.alternative(X_test.form[0], 1000, exchange=False)

X_data = X_train[rows].reshape(1000, 1).astype(“float”)

X_label = y_train[rows]

# run t-SNE to rework into 2D and visualize in scatter plot

Y = tSNE(X_data, 2, 30, 0, 500, 100, 400)

plt.determine(figsize=(8,8))

plt.scatter(Y[:,0], Y[:,1], c=X_label)

plt.present()



[ad_2]

Leave a Reply