fixes
Browse files- modeling_deberta.py +0 -34
modeling_deberta.py
CHANGED
|
@@ -1376,11 +1376,6 @@ class DebertaV2LMPredictionHead(nn.Module):
|
|
| 1376 |
# an output-only bias for each token.
|
| 1377 |
self.decoder = nn.Linear(self.embedding_size, config.vocab_size, bias=True)
|
| 1378 |
|
| 1379 |
-
#self.bias = nn.Parameter(torch.zeros(config.vocab_size))
|
| 1380 |
-
|
| 1381 |
-
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
|
| 1382 |
-
#self.decoder.bias = self.bias
|
| 1383 |
-
|
| 1384 |
def forward(self, hidden_states):
|
| 1385 |
hidden_states = self.transform(hidden_states)
|
| 1386 |
hidden_states = self.decoder(hidden_states)
|
|
@@ -1398,13 +1393,6 @@ class DebertaV2OnlyMLMHead(nn.Module):
|
|
| 1398 |
return prediction_scores
|
| 1399 |
|
| 1400 |
|
| 1401 |
-
@add_start_docstrings(
|
| 1402 |
-
"""
|
| 1403 |
-
DeBERTa Model transformer with a sequence classification/regression head on top (a linear layer on top of the
|
| 1404 |
-
pooled output) e.g. for GLUE tasks.
|
| 1405 |
-
""",
|
| 1406 |
-
DEBERTA_START_DOCSTRING,
|
| 1407 |
-
)
|
| 1408 |
class DebertaV2ForSequenceClassification(DebertaV2PreTrainedModel):
|
| 1409 |
def __init__(self, config):
|
| 1410 |
super().__init__(config)
|
|
@@ -1517,14 +1505,6 @@ class DebertaV2ForSequenceClassification(DebertaV2PreTrainedModel):
|
|
| 1517 |
)
|
| 1518 |
|
| 1519 |
|
| 1520 |
-
@add_start_docstrings(
|
| 1521 |
-
"""
|
| 1522 |
-
DeBERTa Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
|
| 1523 |
-
Named-Entity-Recognition (NER) tasks.
|
| 1524 |
-
""",
|
| 1525 |
-
DEBERTA_START_DOCSTRING,
|
| 1526 |
-
)
|
| 1527 |
-
# Copied from transformers.models.deberta.modeling_deberta.DebertaForTokenClassification with Deberta->DebertaV2
|
| 1528 |
class DebertaV2ForTokenClassification(DebertaV2PreTrainedModel):
|
| 1529 |
def __init__(self, config):
|
| 1530 |
super().__init__(config)
|
|
@@ -1591,13 +1571,6 @@ class DebertaV2ForTokenClassification(DebertaV2PreTrainedModel):
|
|
| 1591 |
)
|
| 1592 |
|
| 1593 |
|
| 1594 |
-
@add_start_docstrings(
|
| 1595 |
-
"""
|
| 1596 |
-
DeBERTa Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
|
| 1597 |
-
layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
|
| 1598 |
-
""",
|
| 1599 |
-
DEBERTA_START_DOCSTRING,
|
| 1600 |
-
)
|
| 1601 |
class DebertaV2ForQuestionAnswering(DebertaV2PreTrainedModel):
|
| 1602 |
def __init__(self, config):
|
| 1603 |
super().__init__(config)
|
|
@@ -1691,13 +1664,6 @@ class DebertaV2ForQuestionAnswering(DebertaV2PreTrainedModel):
|
|
| 1691 |
)
|
| 1692 |
|
| 1693 |
|
| 1694 |
-
@add_start_docstrings(
|
| 1695 |
-
"""
|
| 1696 |
-
DeBERTa Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a
|
| 1697 |
-
softmax) e.g. for RocStories/SWAG tasks.
|
| 1698 |
-
""",
|
| 1699 |
-
DEBERTA_START_DOCSTRING,
|
| 1700 |
-
)
|
| 1701 |
class DebertaV2ForMultipleChoice(DebertaV2PreTrainedModel):
|
| 1702 |
def __init__(self, config):
|
| 1703 |
super().__init__(config)
|
|
|
|
| 1376 |
# an output-only bias for each token.
|
| 1377 |
self.decoder = nn.Linear(self.embedding_size, config.vocab_size, bias=True)
|
| 1378 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1379 |
def forward(self, hidden_states):
|
| 1380 |
hidden_states = self.transform(hidden_states)
|
| 1381 |
hidden_states = self.decoder(hidden_states)
|
|
|
|
| 1393 |
return prediction_scores
|
| 1394 |
|
| 1395 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1396 |
class DebertaV2ForSequenceClassification(DebertaV2PreTrainedModel):
|
| 1397 |
def __init__(self, config):
|
| 1398 |
super().__init__(config)
|
|
|
|
| 1505 |
)
|
| 1506 |
|
| 1507 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1508 |
class DebertaV2ForTokenClassification(DebertaV2PreTrainedModel):
|
| 1509 |
def __init__(self, config):
|
| 1510 |
super().__init__(config)
|
|
|
|
| 1571 |
)
|
| 1572 |
|
| 1573 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1574 |
class DebertaV2ForQuestionAnswering(DebertaV2PreTrainedModel):
|
| 1575 |
def __init__(self, config):
|
| 1576 |
super().__init__(config)
|
|
|
|
| 1664 |
)
|
| 1665 |
|
| 1666 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1667 |
class DebertaV2ForMultipleChoice(DebertaV2PreTrainedModel):
|
| 1668 |
def __init__(self, config):
|
| 1669 |
super().__init__(config)
|