You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

45 lines
1.3 KiB

import os
from pathlib import Path
from xenua import LayeredObject, import_module
gpt2_name_mapper = {
's': '124M',
'm': '355M',
'l': '774M',
'xl': '1558M',
}
defaults = {
'datadir': Path(__file__).parent.parent / 'data',
'parsing_arbitrary_exclude_fn': lambda content: False,
'parsing_exclude_mentions': True,
'parsed_posts_file': 'posts.txt',
'tokenizer_output_prefix': 'fedibooks',
'model_size': 's', # s/m/l/xl, meaning 124M, 355M, 774M, and 1558M, respectively
'model_folder': 'trained_model',
'use_gpu': False, # todo: implement
'prompt_before_training': True,
'training_block_size': 64,
'training_num_workers': 4, # seems to have no effect
'training_batch_size': 8, # no clue what this does, docs kinda suck
'training_num_steps': 50000,
'training_sample_frequency': 5000,
'training_save_frequency': 5000,
'generation_zwsp_mentions': True,
'generation_prompt': None,
'generation_include_prompt': False,
'generation_max_length': 400,
'generation_temperature': 0.7,
}
try:
user_config = import_module(Path(__file__).parent.parent / 'config.py')
except ModuleNotFoundError:
user_config = None
c = LayeredObject(os.environ, user_config, defaults)
c.datadir = Path(c.datadir)
c.datadir.mkdir(parents=True, exist_ok=True)