You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
45 lines
1.3 KiB
45 lines
1.3 KiB
import os |
|
from pathlib import Path |
|
|
|
from xenua import LayeredObject, import_module |
|
|
|
gpt2_name_mapper = { |
|
's': '124M', |
|
'm': '355M', |
|
'l': '774M', |
|
'xl': '1558M', |
|
} |
|
|
|
defaults = { |
|
'datadir': Path(__file__).parent.parent / 'data', |
|
'parsing_arbitrary_exclude_fn': lambda content: False, |
|
'parsing_exclude_mentions': True, |
|
'parsed_posts_file': 'posts.txt', |
|
'tokenizer_output_prefix': 'fedibooks', |
|
'model_size': 's', # s/m/l/xl, meaning 124M, 355M, 774M, and 1558M, respectively |
|
'model_folder': 'trained_model', |
|
'use_gpu': False, # todo: implement |
|
'prompt_before_training': True, |
|
'training_block_size': 64, |
|
'training_num_workers': 4, # seems to have no effect |
|
'training_batch_size': 8, # no clue what this does, docs kinda suck |
|
'training_num_steps': 50000, |
|
'training_sample_frequency': 5000, |
|
'training_save_frequency': 5000, |
|
'generation_zwsp_mentions': True, |
|
'generation_prompt': None, |
|
'generation_include_prompt': False, |
|
'generation_max_length': 400, |
|
'generation_temperature': 0.7, |
|
} |
|
|
|
try: |
|
user_config = import_module(Path(__file__).parent.parent / 'config.py') |
|
except ModuleNotFoundError: |
|
user_config = None |
|
|
|
c = LayeredObject(os.environ, user_config, defaults) |
|
|
|
c.datadir = Path(c.datadir) |
|
|
|
c.datadir.mkdir(parents=True, exist_ok=True)
|
|
|