|
@@ -1416,7 +1416,11 @@ class UnimerMBartDecoder(UnimerMBartPreTrainedModel):
|
|
|
raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
|
|
raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
|
|
|
|
|
|
|
|
# past_key_values_length
|
|
# past_key_values_length
|
|
|
- past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
|
|
|
|
|
|
|
+ # past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
|
|
|
|
|
+ past_key_values_length = 0
|
|
|
|
|
+ if past_key_values is not None:
|
|
|
|
|
+ if isinstance(past_key_values, (list, tuple)) and past_key_values:
|
|
|
|
|
+ past_key_values_length = past_key_values[0][0].shape[2]
|
|
|
|
|
|
|
|
if inputs_embeds is None:
|
|
if inputs_embeds is None:
|
|
|
inputs_embeds = self.embed_tokens(input_ids)
|
|
inputs_embeds = self.embed_tokens(input_ids)
|
|
@@ -1501,7 +1505,12 @@ class UnimerMBartDecoder(UnimerMBartPreTrainedModel):
|
|
|
if dropout_probability < self.layerdrop:
|
|
if dropout_probability < self.layerdrop:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- past_key_value = past_key_values[idx] if past_key_values is not None else None
|
|
|
|
|
|
|
+ # past_key_value = past_key_values[idx] if past_key_values is not None else None
|
|
|
|
|
+ past_key_value = past_key_values[idx] if (
|
|
|
|
|
+ past_key_values is not None and
|
|
|
|
|
+ isinstance(past_key_values, (list, tuple)) and
|
|
|
|
|
+ idx < len(past_key_values)
|
|
|
|
|
+ ) else None
|
|
|
|
|
|
|
|
if self.gradient_checkpointing and self.training:
|
|
if self.gradient_checkpointing and self.training:
|
|
|
layer_outputs = self._gradient_checkpointing_func(
|
|
layer_outputs = self._gradient_checkpointing_func(
|