by Guest » 24 Dec 2024, 11:08
Ich verwende keras=3.7.0 und versuche, einen benutzerdefinierten TCAN-Block (Temporal Convolutional Attention Network) zu implementieren. Während die Aufmerksamkeitsschicht in einem eigenständigen Testfall funktioniert, tritt bei der Integration in mein benutzerdefiniertes Modell ein Problem auf. Konkret tritt der Fehler auf, wenn ich versuche, die Ausgabe der Attention-Ebene zu entpacken.
Folgendes funktioniert:
Code: Select all
import tensorflow as tf
from tensorflow.keras.layers import Attention, Input
# Example inputs
batch_size, time_steps, features = 2, 8, 16
query = tf.random.uniform((batch_size, time_steps, features))
value = tf.random.uniform((batch_size, time_steps, features))
key = tf.random.uniform((batch_size, time_steps, features))
# Attention layer with return_attention_scores=True
attention_layer = Attention(use_scale=True, dropout=0.1)
output, attention_scores = attention_layer(
[query, value, key], return_attention_scores=True, use_causal_mask=True
)
print(f"Output shape: {output.shape}")
print(f"Attention scores shape: {attention_scores.shape}")
Ergibt:
Code: Select all
Output shape: (2, 8, 16)
Attention scores shape: (2, 8, 8)
Warum funktioniert es in meinem Hauptcode nicht?
Code: Select all
def tcan_block(inputs, filters, kernel_size, activation, dilation_rate, d_k, atn_dropout):
"""
A single block of TCAN.
Arguments:
inputs: Tensor, input sequence.
filters: Integer, number of filters for the convolution.
kernel_size: Integer, size of the convolution kernel.
dilation_rate: Integer, dilation rate for the convolution.
d_k: Integer, dimensionality of the attention keys/queries.
Returns:
Tensor, output of the TCAN block.
"""
# Temporal Attention
query = Dense(d_k)(inputs)
key = Dense(d_k)(inputs)
value = Dense(d_k)(inputs)
# Apply Keras Attention with causal masking
attention_output, attention_scores = Attention(use_scale=True, dropout=atn_dropout)(
[query, value, key],
use_causal_mask=True,
return_attention_scores=True,
)
# Dilated Convolution
conv_output = Conv1D(
filters, kernel_size, dilation_rate=dilation_rate, padding="causal", activation=activation
)(attention_output)
# Enhanced Residual
# Calculate Mt using cumulative sum up to each time step
importance = Lambda(lambda x: K.cumsum(x, axis=1))(attention_scores)
enhanced_residual = Lambda(lambda x: x[0] * x[1])([inputs, importance])
# Add residual connection
output = Add()([inputs, conv_output, enhanced_residual])
return output
Fehler:
Code: Select all
File "/home/furkan/Documents/Deep-Learning-Model/src/utils/models/tcan.py", line 138, in build_tcan_model
x = tcan_block(
^^^^^^^^^^^
File "/home/furkan/Documents/Deep-Learning-Model/src/utils/models/tcan.py", line 87, in tcan_block
attention_output, attention_scores = Attention(use_scale=True, dropout=atn_dropout)(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/furkan/Documents/Deep-Learning-Model/.venv/lib/python3.11/site-packages/keras/src/backend/common/keras_tensor.py", line 167, in __iter__
raise NotImplementedError(
NotImplementedError: Iterating over a symbolic KerasTensor is not supported.
Ich verwende keras=3.7.0 und versuche, einen benutzerdefinierten TCAN-Block (Temporal Convolutional Attention Network) zu implementieren. Während die Aufmerksamkeitsschicht in einem eigenständigen Testfall funktioniert, tritt bei der Integration in mein benutzerdefiniertes Modell ein Problem auf. Konkret tritt der Fehler auf, wenn ich versuche, die Ausgabe der Attention-Ebene zu entpacken.
Folgendes funktioniert:
[code]
import tensorflow as tf
from tensorflow.keras.layers import Attention, Input
# Example inputs
batch_size, time_steps, features = 2, 8, 16
query = tf.random.uniform((batch_size, time_steps, features))
value = tf.random.uniform((batch_size, time_steps, features))
key = tf.random.uniform((batch_size, time_steps, features))
# Attention layer with return_attention_scores=True
attention_layer = Attention(use_scale=True, dropout=0.1)
output, attention_scores = attention_layer(
[query, value, key], return_attention_scores=True, use_causal_mask=True
)
print(f"Output shape: {output.shape}")
print(f"Attention scores shape: {attention_scores.shape}")
[/code]
Ergibt:
[code]Output shape: (2, 8, 16)
Attention scores shape: (2, 8, 8)
[/code]
Warum funktioniert es in meinem Hauptcode nicht?
[code]def tcan_block(inputs, filters, kernel_size, activation, dilation_rate, d_k, atn_dropout):
"""
A single block of TCAN.
Arguments:
inputs: Tensor, input sequence.
filters: Integer, number of filters for the convolution.
kernel_size: Integer, size of the convolution kernel.
dilation_rate: Integer, dilation rate for the convolution.
d_k: Integer, dimensionality of the attention keys/queries.
Returns:
Tensor, output of the TCAN block.
"""
# Temporal Attention
query = Dense(d_k)(inputs)
key = Dense(d_k)(inputs)
value = Dense(d_k)(inputs)
# Apply Keras Attention with causal masking
attention_output, attention_scores = Attention(use_scale=True, dropout=atn_dropout)(
[query, value, key],
use_causal_mask=True,
return_attention_scores=True,
)
# Dilated Convolution
conv_output = Conv1D(
filters, kernel_size, dilation_rate=dilation_rate, padding="causal", activation=activation
)(attention_output)
# Enhanced Residual
# Calculate Mt using cumulative sum up to each time step
importance = Lambda(lambda x: K.cumsum(x, axis=1))(attention_scores)
enhanced_residual = Lambda(lambda x: x[0] * x[1])([inputs, importance])
# Add residual connection
output = Add()([inputs, conv_output, enhanced_residual])
return output
[/code]
Fehler:
[code] File "/home/furkan/Documents/Deep-Learning-Model/src/utils/models/tcan.py", line 138, in build_tcan_model
x = tcan_block(
^^^^^^^^^^^
File "/home/furkan/Documents/Deep-Learning-Model/src/utils/models/tcan.py", line 87, in tcan_block
attention_output, attention_scores = Attention(use_scale=True, dropout=atn_dropout)(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/furkan/Documents/Deep-Learning-Model/.venv/lib/python3.11/site-packages/keras/src/backend/common/keras_tensor.py", line 167, in __iter__
raise NotImplementedError(
NotImplementedError: Iterating over a symbolic KerasTensor is not supported.
[/code]