#pragma once

struct load_model_inputs
{
    const int threads;
    const int max_context_length;
    const int batch_size;
    const bool f16_kv;
    const char *model_filename;
    const int n_parts_overwrite = -1;
    const bool use_mmap;
};
struct generation_inputs
{
    const int seed;
    const char *prompt;
    const int max_context_length;
    const int max_length;
    const float temperature;
    const int top_k;
    const float top_p;
    const float rep_pen;
    const int rep_pen_range;
};
struct generation_outputs
{
    int status = -1;
    char text[16384]; //16kb should be enough for any response
};