JDOpenISCT
/
SAE
mirror of https://github.com/JD-AI-Research-Silicon-Valley/SAE

 
			
							# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" RoBERTa configuration """

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

import logging, json, sys
from io import open

from .configuration_utils import PretrainedConfig
from .configuration_bert import BertConfig

logger = logging.getLogger(__name__)

ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = {
    'roberta-base': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-config.json",
    'roberta-large': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-config.json",
    'roberta-large-mnli': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-config.json",
}

class RobertaConfig(BertConfig):
    pretrained_config_archive_map = ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP

# class RobertaConfig(PretrainedConfig):
#     r"""
#         :class:`~pytorch_transformers.BertConfig` is the configuration class to store the configuration of a
#         `BertModel`.


#         Arguments:
#             vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`.
#             hidden_size: Size of the encoder layers and the pooler layer.
#             num_hidden_layers: Number of hidden layers in the Transformer encoder.
#             num_attention_heads: Number of attention heads for each attention layer in
#                 the Transformer encoder.
#             intermediate_size: The size of the "intermediate" (i.e., feed-forward)
#                 layer in the Transformer encoder.
#             hidden_act: The non-linear activation function (function or string) in the
#                 encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
#             hidden_dropout_prob: The dropout probabilitiy for all fully connected
#                 layers in the embeddings, encoder, and pooler.
#             attention_probs_dropout_prob: The dropout ratio for the attention
#                 probabilities.
#             max_position_embeddings: The maximum sequence length that this model might
#                 ever be used with. Typically set this to something large just in case
#                 (e.g., 512 or 1024 or 2048).
#             type_vocab_size: The vocabulary size of the `token_type_ids` passed into
#                 `BertModel`.
#             initializer_range: The sttdev of the truncated_normal_initializer for
#                 initializing all weight matrices.
#             layer_norm_eps: The epsilon used by LayerNorm.
#     """
#     pretrained_config_archive_map = ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP

#     def __init__(self,
#                  vocab_size_or_config_json_file=30522,
#                  hidden_size=768,
#                  num_hidden_layers=12,
#                  num_attention_heads=12,
#                  intermediate_size=3072,
#                  hidden_act="gelu",
#                  hidden_dropout_prob=0.1,
#                  attention_probs_dropout_prob=0.1,
#                  max_position_embeddings=512,
#                  type_vocab_size=2,
#                  initializer_range=0.02,
#                  layer_norm_eps=1e-12,
#                  padding_idx=1,
#                  **kwargs):
#         super(RobertaConfig, self).__init__(**kwargs)
#         if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2
#                         and isinstance(vocab_size_or_config_json_file, unicode)):
#             with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
#                 json_config = json.loads(reader.read())
#             for key, value in json_config.items():
#                 self.__dict__[key] = value
#         elif isinstance(vocab_size_or_config_json_file, int):
#             self.vocab_size = vocab_size_or_config_json_file
#             self.hidden_size = hidden_size
#             self.num_hidden_layers = num_hidden_layers
#             self.num_attention_heads = num_attention_heads
#             self.hidden_act = hidden_act
#             self.intermediate_size = intermediate_size
#             self.hidden_dropout_prob = hidden_dropout_prob
#             self.attention_probs_dropout_prob = attention_probs_dropout_prob
#             self.max_position_embeddings = max_position_embeddings
#             self.type_vocab_size = type_vocab_size
#             self.initializer_range = initializer_range
#             self.layer_norm_eps = layer_norm_eps
#             self.padding_idx = padding_idx
#         else:
#             raise ValueError("First argument must be either a vocabulary size (int)"
#                              " or the path to a pretrained model config file (str)")