Rename flaubert2_configuration.py to jargon_configuration.py
Browse files
flaubert2_configuration.py → jargon_configuration.py
RENAMED
|
@@ -1,10 +1,13 @@
|
|
| 1 |
|
| 2 |
from transformers.models.roberta.modeling_roberta import RobertaConfig
|
| 3 |
|
| 4 |
-
class Flaubert2Config(RobertaConfig):
|
| 5 |
-
model_type = "flaubert2"
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
shared_layer_kv_compressed=1,
|
| 9 |
shared_kv_compressed=0,
|
| 10 |
max_positions=512,
|
|
@@ -38,10 +41,11 @@ class Flaubert2Config(RobertaConfig):
|
|
| 38 |
add_pooling_layer=False,
|
| 39 |
intermediate_size=4096,
|
| 40 |
intermediate_act_fn="relu",
|
| 41 |
-
hidden_act
|
| 42 |
output_hidden_states=False,
|
| 43 |
position_embedding_type="learned",
|
| 44 |
-
**kwargs
|
|
|
|
| 45 |
super().__init__(**kwargs)
|
| 46 |
|
| 47 |
self.add_pooling_layer = add_pooling_layer
|
|
|
|
| 1 |
|
| 2 |
from transformers.models.roberta.modeling_roberta import RobertaConfig
|
| 3 |
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
class JargonConfig(RobertaConfig):
|
| 6 |
+
model_type = "jargon"
|
| 7 |
+
|
| 8 |
+
def __init__(
|
| 9 |
+
self,
|
| 10 |
+
compress_layer= 1,
|
| 11 |
shared_layer_kv_compressed=1,
|
| 12 |
shared_kv_compressed=0,
|
| 13 |
max_positions=512,
|
|
|
|
| 41 |
add_pooling_layer=False,
|
| 42 |
intermediate_size=4096,
|
| 43 |
intermediate_act_fn="relu",
|
| 44 |
+
hidden_act="relu",
|
| 45 |
output_hidden_states=False,
|
| 46 |
position_embedding_type="learned",
|
| 47 |
+
**kwargs
|
| 48 |
+
):
|
| 49 |
super().__init__(**kwargs)
|
| 50 |
|
| 51 |
self.add_pooling_layer = add_pooling_layer
|