Anonymous
Repetitive Wortausgabe im Text -Zusammenfassung
Post
by Anonymous » 05 Mar 2025, 06:04
Ich habe daran gearbeitet, einen Text -Zusammenfassung zu erstellen. Buten, wenn ich versuchte, meinen Code auszuführen. Die Ausgabe ist falsch. Der Code ist unten angegeben. < /p>
Code: Select all
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
import numpy as np
import tensorflow as tf
import time
import re
import pickle
# Load data
data_path = '/content/drive/MyDrive/train.csv' # Replace with the actual path to your data
data = pd.read_csv(data_path)
data.head()
data = data.rename(columns={'article': 'article', 'highlights': 'summary'}) # Rename columns
data = data.drop('id', axis=1) # Drop 'id' column
document = data['article']
summary = data['summary']
data.head()
# Add tokens for decoder sequence
summary = summary.apply(lambda x: ' ' + x + ' ')
# Tokenizer setup
filters = '!"#$%&()*+,-./:;=?@[\\]^_`{|}~\t\n'
oov_token = ''
document_tokenizer = tf.keras.preprocessing.text.Tokenizer(oov_token=oov_token)
summary_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters=filters, oov_token=oov_token)
document_tokenizer.fit_on_texts(document)
summary_tokenizer.fit_on_texts(summary)
inputs = document_tokenizer.texts_to_sequences(document)
targets = summary_tokenizer.texts_to_sequences(summary)
# Get vocab sizes
encoder_vocab_size = len(document_tokenizer.word_index) + 1
decoder_vocab_size = len(summary_tokenizer.word_index) + 1
# Sequence padding
encoder_maxlen = 256
decoder_maxlen = 50
inputs = tf.keras.preprocessing.sequence.pad_sequences(inputs, maxlen=encoder_maxlen, padding='post', truncating='post')
targets = tf.keras.preprocessing.sequence.pad_sequences(targets, maxlen=decoder_maxlen, padding='post', truncating='post')
inputs = tf.cast(inputs, dtype=tf.int32)
targets = tf.cast(targets, dtype=tf.int32)
# Dataset setup
BUFFER_SIZE = 20000
BATCH_SIZE = 8
dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
# Positional Encoding
def get_angles(position, i, d_model):
angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
return position * angle_rates
def positional_encoding(position, d_model):
angle_rads = get_angles(np.arange(position)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model)
# Apply sin to even indices; 2i
angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
# Apply cos to odd indices; 2i+1
angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
pos_encoding = angle_rads[np.newaxis, ...]
return tf.cast(pos_encoding, dtype=tf.float32)
# Masks
def create_padding_mask(seq):
seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
return seq[:, tf.newaxis, tf.newaxis, :]
def create_look_ahead_mask(size):
mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
return mask
# Scaled Dot-Product Attention
def scaled_dot_product_attention(q, k, v, mask):
matmul_qk = tf.matmul(q, k, transpose_b=True)
dk = tf.cast(tf.shape(k)[-1], tf.float32)
scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)
if mask is not None:
scaled_attention_logits += (mask * -1e9)
attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
output = tf.matmul(attention_weights, v)
return output, attention_weights
# Multi-Head Attention Layer
class MultiHeadAttention(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads):
super(MultiHeadAttention, self).__init__()
self.num_heads = num_heads
self.d_model = d_model
assert d_model % self.num_heads == 0
self.depth = d_model // self.num_heads
self.wq = tf.keras.layers.Dense(d_model)
self.wk = tf.keras.layers.Dense(d_model)
self.wv = tf.keras.layers.Dense(d_model)
self.dense = tf.keras.layers.Dense(d_model)
def split_heads(self, x, batch_size):
x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
return tf.transpose(x, perm=[0, 2, 1, 3])
def call(self, v, k, q, mask):
batch_size = tf.shape(q)[0]
q = self.wq(q)
k = self.wk(k)
v = self.wv(v)
q = self.split_heads(q, batch_size)
k = self.split_heads(k, batch_size)
v = self.split_heads(v, batch_size)
scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask)
scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])
concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))
output = self.dense(concat_attention)
return output, attention_weights
# Feed Forward Layer
def point_wise_feed_forward_network(d_model, dff):
return tf.keras.Sequential([
tf.keras.layers.Dense(dff, activation='relu'),
tf.keras.layers.Dense(d_model)
])
# Encoder Layer
class EncoderLayer(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads, dff, rate=0.1):
super(EncoderLayer, self).__init__()
self.mha = MultiHeadAttention(d_model, num_heads)
self.ffn = point_wise_feed_forward_network(d_model, dff)
self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = tf.keras.layers.Dropout(rate)
self.dropout2 = tf.keras.layers.Dropout(rate)
def call(self, x, training, mask):
attn_output, _ = self.mha(x, x, x, mask)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(x + attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output, training=training)
out2 = self.layernorm2(out1 + ffn_output)
return out2
# Decoder Layer
class DecoderLayer(tf.keras.layers.Layer):
def __init__(self, d_model, num_heads, dff, rate=0.1):
super(DecoderLayer, self).__init__()
self.mha1 = MultiHeadAttention(d_model, num_heads)
self.mha2 = MultiHeadAttention(d_model, num_heads)
self.ffn = point_wise_feed_forward_network(d_model, dff)
self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = tf.keras.layers.Dropout(rate)
self.dropout2 = tf.keras.layers.Dropout(rate)
self.dropout3 = tf.keras.layers.Dropout(rate)
def call(self, x, enc_output, training, look_ahead_mask, padding_mask):
attn1, attn_weights_block1 = self.mha1(x, x, x, look_ahead_mask)
attn1 = self.dropout1(attn1, training=training)
out1 = self.layernorm1(attn1 + x)
attn2, attn_weights_block2 = self.mha2(enc_output, enc_output, out1, padding_mask)
attn2 = self.dropout2(attn2, training=training)
out2 = self.layernorm2(attn2 + out1)
ffn_output = self.ffn(out2)
ffn_output = self.dropout3(ffn_output, training=training)
out3 = self.layernorm3(ffn_output + out2)
return out3, attn_weights_block1, attn_weights_block2
# Encoder
class Encoder(tf.keras.layers.Layer):
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, maximum_position_encoding, rate=0.1):
super(Encoder, self).__init__()
self.d_model = d_model
self.num_layers = num_layers
self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)
self.pos_encoding = positional_encoding(maximum_position_encoding, self.d_model)
self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]
self.dropout = tf.keras.layers.Dropout(rate)
def call(self, x, training, enc_padding_mask):
seq_len = tf.shape(x)[1]
# Add embedding and position encoding
x = self.embedding(x) # (batch_size, input_seq_len, d_model)
x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
x += self.pos_encoding[:, :seq_len, :]
x = self.dropout(x, training=training)
for i in range(self.num_layers):
x = self.enc_layers[i](x, training=training, mask=enc_padding_mask)
return x # (batch_size, input_seq_len, d_model)
# Decoder
class Decoder(tf.keras.layers.Layer):
def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size, maximum_position_encoding, rate=0.1):
super(Decoder, self).__init__()
self.d_model = d_model
self.num_layers = num_layers
self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)
self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)
self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]
self.dropout = tf.keras.layers.Dropout(rate)
def call(self, x, enc_output, training, look_ahead_mask, dec_padding_mask):
seq_len = tf.shape(x)[1]
attention_weights = {}
x = self.embedding(x) # (batch_size, target_seq_len, d_model)
x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
x += self.pos_encoding[:, :seq_len, :]
x = self.dropout(x, training=training)
for i in range(self.num_layers):
x, block1, block2 = self.dec_layers[i](x, enc_output, training=training, look_ahead_mask=look_ahead_mask, padding_mask=dec_padding_mask)
attention_weights[f'decoder_layer{i+1}_block1'] = block1
attention_weights[f'decoder_layer{i+1}_block2'] = block2
# x.shape == (batch_size, target_seq_len, d_model)
return x, attention_weights
# Transformer
class Transformer(tf.keras.Model):
def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input, pe_target, rate=0.1):
super(Transformer, self).__init__()
self.encoder = Encoder(num_layers, d_model, num_heads, dff, input_vocab_size, pe_input, rate)
self.decoder = Decoder(num_layers, d_model, num_heads, dff, target_vocab_size, pe_target, rate)
self.final_layer = tf.keras.layers.Dense(target_vocab_size)
def call(self, inp, tar, training, enc_padding_mask, look_ahead_mask, dec_padding_mask):
# Ensure 'training' is passed as a keyword argument to encoder and decoder
enc_output = self.encoder(inp, training=training, enc_padding_mask=enc_padding_mask)
dec_output, attention_weights = self.decoder(
tar, enc_output, training=training, look_ahead_mask=look_ahead_mask, dec_padding_mask=dec_padding_mask
)
final_output = self.final_layer(dec_output)
return final_output, attention_weights
# Hyperparameters
num_layers = 4
d_model = 256
dff = 2048
num_heads = 8
dropout_rate = 0.1
# Initialize the Transformer
transformer = Transformer(
num_layers=num_layers,
d_model=d_model,
num_heads=num_heads,
dff=dff,
input_vocab_size=encoder_vocab_size,
target_vocab_size=decoder_vocab_size,
pe_input=encoder_maxlen,
pe_target=decoder_maxlen,
rate=dropout_rate
)
# Optimizer
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
def __init__(self, d_model, warmup_steps=4000):
super(CustomSchedule, self).__init__()
self.d_model = tf.cast(d_model, tf.float32)
self.warmup_steps = warmup_steps
def __call__(self, step):
step = tf.cast(step, tf.float32) # Ensure step is float
arg1 = tf.math.rsqrt(step)
arg2 = step * (self.warmup_steps ** -1.5)
return tf.math.sqrt(self.d_model) * tf.math.minimum(arg1, arg2)
learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)
# Loss and Metrics
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
def loss_function(real, pred):
mask = tf.cast(tf.not_equal(real, 0), dtype=tf.float32)
loss_ = loss_object(real, pred)
loss_ *= mask
return tf.reduce_sum(loss_) / tf.reduce_sum(mask)
train_loss = tf.keras.metrics.Mean(name='train_loss')
# Checkpoints
checkpoint_path = "./checkpoints/train"
ckpt = tf.train.Checkpoint(transformer=transformer, optimizer=optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)
if ckpt_manager.latest_checkpoint:
ckpt.restore(ckpt_manager.latest_checkpoint)
print('Latest checkpoint restored!!')
# Masks
def create_masks(inp, tar):
enc_padding_mask = create_padding_mask(inp)
dec_padding_mask = create_padding_mask(inp)
look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])
dec_target_padding_mask = create_padding_mask(tar)
combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)
return enc_padding_mask, combined_mask, dec_padding_mask
# Training Step
@tf.function
def train_step(inp, tar):
tar_inp = tar[:, :-1] # Prepare target input
tar_real = tar[:, 1:] # Prepare target output
enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)
with tf.GradientTape() as tape:
# Ensure both 'inp' and 'tar_inp' are passed
predictions, _ = transformer(
inp=inp,
tar=tar_inp, # Pass 'tar' argument correctly
training=True, # Pass as keyword argument
enc_padding_mask=enc_padding_mask,
look_ahead_mask=combined_mask,
dec_padding_mask=dec_padding_mask
)
loss = loss_function(tar_real, predictions)
gradients = tape.gradient(loss, transformer.trainable_variables)
optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
train_loss.update_state(loss)
# Training
EPOCHS = 2
for epoch in range(EPOCHS):
start = time.time()
train_loss.reset_state()
for (batch, (inp, tar)) in enumerate(dataset):
train_step(inp, tar)
if batch % 100 == 0:
print(f'Epoch {epoch + 1} Batch {batch} Loss {train_loss.result():.4f}')
if (epoch + 1) % 5 == 0:
ckpt_save_path = ckpt_manager.save()
print(f'Saving checkpoint for epoch {epoch + 1} at {ckpt_save_path}')
print(f'Epoch {epoch + 1} Loss {train_loss.result():.4f}')
print(f'Time taken for 1 epoch: {time.time() - start:.2f} secs\n')
# Inference
def evaluate(inp_sentence):
start_token = [summary_tokenizer.word_index['']]
end_token = [summary_tokenizer.word_index['']]
# Tokenize input sentence
inp_sequence = document_tokenizer.texts_to_sequences([inp_sentence])
inp_sequence = tf.keras.preprocessing.sequence.pad_sequences(inp_sequence, maxlen=encoder_maxlen, padding='post')
inp_sequence = tf.convert_to_tensor(inp_sequence)
# Initialize the decoder input with the start token
decoder_input = tf.convert_to_tensor([start_token], dtype=tf.int32)
for i in range(decoder_maxlen):
enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp_sequence, decoder_input)
# Predict next token
predictions, attention_weights = transformer(
inp_sequence,
decoder_input,
training=False,
enc_padding_mask=enc_padding_mask,
look_ahead_mask=combined_mask,
dec_padding_mask=dec_padding_mask
)
predictions = predictions[:, -1, :] # Take the last token
predicted_id = tf.argmax(predictions, axis=-1).numpy()[0]
# Stop if end token is predicted
if predicted_id == summary_tokenizer.word_index['']:
break
# Append predicted token to the decoder input
decoder_input = tf.concat([decoder_input, [[predicted_id]]], axis=-1)
# Convert tokenized output back to text
result = summary_tokenizer.sequences_to_texts(decoder_input.numpy())[0]
return result.replace('', '').replace('', '').strip()
# Function to summarize text
def summarize_text(text):
summary = evaluate(text)
return summary
# Example usage
sample_text = "As Asia’s largest technology forum, IMC will further strengthen India’s position as a global leader in digital transformation, fostering new opportunities to collaborate with global ecosystem,” Mr. Scindia said. Apart from the IMC, India hosted the 2024 edition of the World Telecommunication Standardization Assembly, organised by the International Telecommunication Union (ITU), simultaneously with the IMC at Delhi last year."
summary_result = summarize_text(sample_text)
print("Generated Summary:", summary_result)
Ich habe versucht, Chatgpt und andere Ressourcen zu fragen, aber der Code gibt keine korrekte zusammengefasste Ausgabe an.
1741151057
Anonymous
Ich habe daran gearbeitet, einen Text -Zusammenfassung zu erstellen. Buten, wenn ich versuchte, meinen Code auszuführen. Die Ausgabe ist falsch. Der Code ist unten angegeben. < /p> [code]from google.colab import drive drive.mount('/content/drive') import pandas as pd import numpy as np import tensorflow as tf import time import re import pickle # Load data data_path = '/content/drive/MyDrive/train.csv' # Replace with the actual path to your data data = pd.read_csv(data_path) data.head() data = data.rename(columns={'article': 'article', 'highlights': 'summary'}) # Rename columns data = data.drop('id', axis=1) # Drop 'id' column document = data['article'] summary = data['summary'] data.head() # Add tokens for decoder sequence summary = summary.apply(lambda x: ' ' + x + ' ') # Tokenizer setup filters = '!"#$%&()*+,-./:;=?@[\\]^_`{|}~\t\n' oov_token = '' document_tokenizer = tf.keras.preprocessing.text.Tokenizer(oov_token=oov_token) summary_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters=filters, oov_token=oov_token) document_tokenizer.fit_on_texts(document) summary_tokenizer.fit_on_texts(summary) inputs = document_tokenizer.texts_to_sequences(document) targets = summary_tokenizer.texts_to_sequences(summary) # Get vocab sizes encoder_vocab_size = len(document_tokenizer.word_index) + 1 decoder_vocab_size = len(summary_tokenizer.word_index) + 1 # Sequence padding encoder_maxlen = 256 decoder_maxlen = 50 inputs = tf.keras.preprocessing.sequence.pad_sequences(inputs, maxlen=encoder_maxlen, padding='post', truncating='post') targets = tf.keras.preprocessing.sequence.pad_sequences(targets, maxlen=decoder_maxlen, padding='post', truncating='post') inputs = tf.cast(inputs, dtype=tf.int32) targets = tf.cast(targets, dtype=tf.int32) # Dataset setup BUFFER_SIZE = 20000 BATCH_SIZE = 8 dataset = tf.data.Dataset.from_tensor_slices((inputs, targets)).shuffle(BUFFER_SIZE).batch(BATCH_SIZE) # Positional Encoding def get_angles(position, i, d_model): angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model)) return position * angle_rates def positional_encoding(position, d_model): angle_rads = get_angles(np.arange(position)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model) # Apply sin to even indices; 2i angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2]) # Apply cos to odd indices; 2i+1 angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2]) pos_encoding = angle_rads[np.newaxis, ...] return tf.cast(pos_encoding, dtype=tf.float32) # Masks def create_padding_mask(seq): seq = tf.cast(tf.math.equal(seq, 0), tf.float32) return seq[:, tf.newaxis, tf.newaxis, :] def create_look_ahead_mask(size): mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0) return mask # Scaled Dot-Product Attention def scaled_dot_product_attention(q, k, v, mask): matmul_qk = tf.matmul(q, k, transpose_b=True) dk = tf.cast(tf.shape(k)[-1], tf.float32) scaled_attention_logits = matmul_qk / tf.math.sqrt(dk) if mask is not None: scaled_attention_logits += (mask * -1e9) attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1) output = tf.matmul(attention_weights, v) return output, attention_weights # Multi-Head Attention Layer class MultiHeadAttention(tf.keras.layers.Layer): def __init__(self, d_model, num_heads): super(MultiHeadAttention, self).__init__() self.num_heads = num_heads self.d_model = d_model assert d_model % self.num_heads == 0 self.depth = d_model // self.num_heads self.wq = tf.keras.layers.Dense(d_model) self.wk = tf.keras.layers.Dense(d_model) self.wv = tf.keras.layers.Dense(d_model) self.dense = tf.keras.layers.Dense(d_model) def split_heads(self, x, batch_size): x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth)) return tf.transpose(x, perm=[0, 2, 1, 3]) def call(self, v, k, q, mask): batch_size = tf.shape(q)[0] q = self.wq(q) k = self.wk(k) v = self.wv(v) q = self.split_heads(q, batch_size) k = self.split_heads(k, batch_size) v = self.split_heads(v, batch_size) scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask) scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3]) concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model)) output = self.dense(concat_attention) return output, attention_weights # Feed Forward Layer def point_wise_feed_forward_network(d_model, dff): return tf.keras.Sequential([ tf.keras.layers.Dense(dff, activation='relu'), tf.keras.layers.Dense(d_model) ]) # Encoder Layer class EncoderLayer(tf.keras.layers.Layer): def __init__(self, d_model, num_heads, dff, rate=0.1): super(EncoderLayer, self).__init__() self.mha = MultiHeadAttention(d_model, num_heads) self.ffn = point_wise_feed_forward_network(d_model, dff) self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.dropout1 = tf.keras.layers.Dropout(rate) self.dropout2 = tf.keras.layers.Dropout(rate) def call(self, x, training, mask): attn_output, _ = self.mha(x, x, x, mask) attn_output = self.dropout1(attn_output, training=training) out1 = self.layernorm1(x + attn_output) ffn_output = self.ffn(out1) ffn_output = self.dropout2(ffn_output, training=training) out2 = self.layernorm2(out1 + ffn_output) return out2 # Decoder Layer class DecoderLayer(tf.keras.layers.Layer): def __init__(self, d_model, num_heads, dff, rate=0.1): super(DecoderLayer, self).__init__() self.mha1 = MultiHeadAttention(d_model, num_heads) self.mha2 = MultiHeadAttention(d_model, num_heads) self.ffn = point_wise_feed_forward_network(d_model, dff) self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.dropout1 = tf.keras.layers.Dropout(rate) self.dropout2 = tf.keras.layers.Dropout(rate) self.dropout3 = tf.keras.layers.Dropout(rate) def call(self, x, enc_output, training, look_ahead_mask, padding_mask): attn1, attn_weights_block1 = self.mha1(x, x, x, look_ahead_mask) attn1 = self.dropout1(attn1, training=training) out1 = self.layernorm1(attn1 + x) attn2, attn_weights_block2 = self.mha2(enc_output, enc_output, out1, padding_mask) attn2 = self.dropout2(attn2, training=training) out2 = self.layernorm2(attn2 + out1) ffn_output = self.ffn(out2) ffn_output = self.dropout3(ffn_output, training=training) out3 = self.layernorm3(ffn_output + out2) return out3, attn_weights_block1, attn_weights_block2 # Encoder class Encoder(tf.keras.layers.Layer): def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, maximum_position_encoding, rate=0.1): super(Encoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model) self.pos_encoding = positional_encoding(maximum_position_encoding, self.d_model) self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)] self.dropout = tf.keras.layers.Dropout(rate) def call(self, x, training, enc_padding_mask): seq_len = tf.shape(x)[1] # Add embedding and position encoding x = self.embedding(x) # (batch_size, input_seq_len, d_model) x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32)) x += self.pos_encoding[:, :seq_len, :] x = self.dropout(x, training=training) for i in range(self.num_layers): x = self.enc_layers[i](x, training=training, mask=enc_padding_mask) return x # (batch_size, input_seq_len, d_model) # Decoder class Decoder(tf.keras.layers.Layer): def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size, maximum_position_encoding, rate=0.1): super(Decoder, self).__init__() self.d_model = d_model self.num_layers = num_layers self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model) self.pos_encoding = positional_encoding(maximum_position_encoding, d_model) self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)] self.dropout = tf.keras.layers.Dropout(rate) def call(self, x, enc_output, training, look_ahead_mask, dec_padding_mask): seq_len = tf.shape(x)[1] attention_weights = {} x = self.embedding(x) # (batch_size, target_seq_len, d_model) x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32)) x += self.pos_encoding[:, :seq_len, :] x = self.dropout(x, training=training) for i in range(self.num_layers): x, block1, block2 = self.dec_layers[i](x, enc_output, training=training, look_ahead_mask=look_ahead_mask, padding_mask=dec_padding_mask) attention_weights[f'decoder_layer{i+1}_block1'] = block1 attention_weights[f'decoder_layer{i+1}_block2'] = block2 # x.shape == (batch_size, target_seq_len, d_model) return x, attention_weights # Transformer class Transformer(tf.keras.Model): def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input, pe_target, rate=0.1): super(Transformer, self).__init__() self.encoder = Encoder(num_layers, d_model, num_heads, dff, input_vocab_size, pe_input, rate) self.decoder = Decoder(num_layers, d_model, num_heads, dff, target_vocab_size, pe_target, rate) self.final_layer = tf.keras.layers.Dense(target_vocab_size) def call(self, inp, tar, training, enc_padding_mask, look_ahead_mask, dec_padding_mask): # Ensure 'training' is passed as a keyword argument to encoder and decoder enc_output = self.encoder(inp, training=training, enc_padding_mask=enc_padding_mask) dec_output, attention_weights = self.decoder( tar, enc_output, training=training, look_ahead_mask=look_ahead_mask, dec_padding_mask=dec_padding_mask ) final_output = self.final_layer(dec_output) return final_output, attention_weights # Hyperparameters num_layers = 4 d_model = 256 dff = 2048 num_heads = 8 dropout_rate = 0.1 # Initialize the Transformer transformer = Transformer( num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, input_vocab_size=encoder_vocab_size, target_vocab_size=decoder_vocab_size, pe_input=encoder_maxlen, pe_target=decoder_maxlen, rate=dropout_rate ) # Optimizer class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule): def __init__(self, d_model, warmup_steps=4000): super(CustomSchedule, self).__init__() self.d_model = tf.cast(d_model, tf.float32) self.warmup_steps = warmup_steps def __call__(self, step): step = tf.cast(step, tf.float32) # Ensure step is float arg1 = tf.math.rsqrt(step) arg2 = step * (self.warmup_steps ** -1.5) return tf.math.sqrt(self.d_model) * tf.math.minimum(arg1, arg2) learning_rate = CustomSchedule(d_model) optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) # Loss and Metrics loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none') def loss_function(real, pred): mask = tf.cast(tf.not_equal(real, 0), dtype=tf.float32) loss_ = loss_object(real, pred) loss_ *= mask return tf.reduce_sum(loss_) / tf.reduce_sum(mask) train_loss = tf.keras.metrics.Mean(name='train_loss') # Checkpoints checkpoint_path = "./checkpoints/train" ckpt = tf.train.Checkpoint(transformer=transformer, optimizer=optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5) if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) print('Latest checkpoint restored!!') # Masks def create_masks(inp, tar): enc_padding_mask = create_padding_mask(inp) dec_padding_mask = create_padding_mask(inp) look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1]) dec_target_padding_mask = create_padding_mask(tar) combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask) return enc_padding_mask, combined_mask, dec_padding_mask # Training Step @tf.function def train_step(inp, tar): tar_inp = tar[:, :-1] # Prepare target input tar_real = tar[:, 1:] # Prepare target output enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp) with tf.GradientTape() as tape: # Ensure both 'inp' and 'tar_inp' are passed predictions, _ = transformer( inp=inp, tar=tar_inp, # Pass 'tar' argument correctly training=True, # Pass as keyword argument enc_padding_mask=enc_padding_mask, look_ahead_mask=combined_mask, dec_padding_mask=dec_padding_mask ) loss = loss_function(tar_real, predictions) gradients = tape.gradient(loss, transformer.trainable_variables) optimizer.apply_gradients(zip(gradients, transformer.trainable_variables)) train_loss.update_state(loss) # Training EPOCHS = 2 for epoch in range(EPOCHS): start = time.time() train_loss.reset_state() for (batch, (inp, tar)) in enumerate(dataset): train_step(inp, tar) if batch % 100 == 0: print(f'Epoch {epoch + 1} Batch {batch} Loss {train_loss.result():.4f}') if (epoch + 1) % 5 == 0: ckpt_save_path = ckpt_manager.save() print(f'Saving checkpoint for epoch {epoch + 1} at {ckpt_save_path}') print(f'Epoch {epoch + 1} Loss {train_loss.result():.4f}') print(f'Time taken for 1 epoch: {time.time() - start:.2f} secs\n') # Inference def evaluate(inp_sentence): start_token = [summary_tokenizer.word_index['']] end_token = [summary_tokenizer.word_index['']] # Tokenize input sentence inp_sequence = document_tokenizer.texts_to_sequences([inp_sentence]) inp_sequence = tf.keras.preprocessing.sequence.pad_sequences(inp_sequence, maxlen=encoder_maxlen, padding='post') inp_sequence = tf.convert_to_tensor(inp_sequence) # Initialize the decoder input with the start token decoder_input = tf.convert_to_tensor([start_token], dtype=tf.int32) for i in range(decoder_maxlen): enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp_sequence, decoder_input) # Predict next token predictions, attention_weights = transformer( inp_sequence, decoder_input, training=False, enc_padding_mask=enc_padding_mask, look_ahead_mask=combined_mask, dec_padding_mask=dec_padding_mask ) predictions = predictions[:, -1, :] # Take the last token predicted_id = tf.argmax(predictions, axis=-1).numpy()[0] # Stop if end token is predicted if predicted_id == summary_tokenizer.word_index['']: break # Append predicted token to the decoder input decoder_input = tf.concat([decoder_input, [[predicted_id]]], axis=-1) # Convert tokenized output back to text result = summary_tokenizer.sequences_to_texts(decoder_input.numpy())[0] return result.replace('', '').replace('', '').strip() # Function to summarize text def summarize_text(text): summary = evaluate(text) return summary # Example usage sample_text = "As Asia’s largest technology forum, IMC will further strengthen India’s position as a global leader in digital transformation, fostering new opportunities to collaborate with global ecosystem,” Mr. Scindia said. Apart from the IMC, India hosted the 2024 edition of the World Telecommunication Standardization Assembly, organised by the International Telecommunication Union (ITU), simultaneously with the IMC at Delhi last year." summary_result = summarize_text(sample_text) print("Generated Summary:", summary_result) [/code] Ich habe versucht, Chatgpt und andere Ressourcen zu fragen, aber der Code gibt keine korrekte zusammengefasste Ausgabe an.