fixes to stopper tokens, fixed BLAS mode for GPT2 and GPTJ, updated kobold lite

This commit is contained in:
Concedo 2023-04-16 21:54:18 +08:00
parent 6548d3b3fb
commit c757fbee1d
6 changed files with 17 additions and 14 deletions

View file

@ -453,7 +453,7 @@ if __name__ == '__main__':
default_threads = (physical_core_limit if physical_core_limit<=3 else max(3,physical_core_limit-1))
parser.add_argument("--threads", help="Use a custom number of threads if specified. Otherwise, uses an amount based on CPU cores", type=int, default=default_threads)
parser.add_argument("--psutil_set_threads", help="Experimental flag. If set, uses psutils to determine thread count based on physical cores.", action='store_true')
parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512)", type=int,choices=[128,256,512,1024], default=512)
parser.add_argument("--blasbatchsize", help="Sets the batch size used in BLAS processing (default 512)", type=int,choices=[64,128,256,512,1024], default=512)
parser.add_argument("--stream", help="Uses pseudo streaming", action='store_true')
parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')