import tensorflow as tf

def attention(inputs, attention_size):
    inputs = tf.transpose(inputs, [1, 0, 2])
    hidden_size = inputs.shape[2].value
    w_omega = tf.Variable(tf.random_normal([hidden_size, attention_size], stddev=0.1))
    b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
    u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))

    v = tf.tanh(tf.tensordot(inputs, w_omega, axes=1) + b_omega)
    vu = tf.tensordot(v, u_omega, axes=1)
    alphas = tf.nn.softmax(vu)

    output = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), 1)
    return output, alphas
