mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 17:44:38 +00:00
merge checkpoint 2 - functional merge without q4_0_4_4 (need regen shaders)
This commit is contained in:
commit
de64b9198c
218 changed files with 175736 additions and 49778 deletions
|
@ -37,14 +37,14 @@ struct DescriptorSetAndBinding;
|
|||
// provides methods for registering optimization passes and optimizing.
|
||||
//
|
||||
// Instances of this class provides basic thread-safety guarantee.
|
||||
class Optimizer {
|
||||
class SPIRV_TOOLS_EXPORT Optimizer {
|
||||
public:
|
||||
// The token for an optimization pass. It is returned via one of the
|
||||
// Create*Pass() standalone functions at the end of this header file and
|
||||
// consumed by the RegisterPass() method. Tokens are one-time objects that
|
||||
// only support move; copying is not allowed.
|
||||
struct PassToken {
|
||||
struct Impl; // Opaque struct for holding internal data.
|
||||
struct SPIRV_TOOLS_LOCAL Impl; // Opaque struct for holding internal data.
|
||||
|
||||
PassToken(std::unique_ptr<Impl>);
|
||||
|
||||
|
@ -97,12 +97,20 @@ class Optimizer {
|
|||
// Registers passes that attempt to improve performance of generated code.
|
||||
// This sequence of passes is subject to constant review and will change
|
||||
// from time to time.
|
||||
//
|
||||
// If |preserve_interface| is true, all non-io variables in the entry point
|
||||
// interface are considered live and are not eliminated.
|
||||
Optimizer& RegisterPerformancePasses();
|
||||
Optimizer& RegisterPerformancePasses(bool preserve_interface);
|
||||
|
||||
// Registers passes that attempt to improve the size of generated code.
|
||||
// This sequence of passes is subject to constant review and will change
|
||||
// from time to time.
|
||||
//
|
||||
// If |preserve_interface| is true, all non-io variables in the entry point
|
||||
// interface are considered live and are not eliminated.
|
||||
Optimizer& RegisterSizePasses();
|
||||
Optimizer& RegisterSizePasses(bool preserve_interface);
|
||||
|
||||
// Registers passes that attempt to legalize the generated code.
|
||||
//
|
||||
|
@ -112,7 +120,11 @@ class Optimizer {
|
|||
//
|
||||
// This sequence of passes is subject to constant review and will change
|
||||
// from time to time.
|
||||
//
|
||||
// If |preserve_interface| is true, all non-io variables in the entry point
|
||||
// interface are considered live and are not eliminated.
|
||||
Optimizer& RegisterLegalizationPasses();
|
||||
Optimizer& RegisterLegalizationPasses(bool preserve_interface);
|
||||
|
||||
// Register passes specified in the list of |flags|. Each flag must be a
|
||||
// string of a form accepted by Optimizer::FlagHasValidForm().
|
||||
|
@ -121,8 +133,13 @@ class Optimizer {
|
|||
// error message is emitted to the MessageConsumer object (use
|
||||
// Optimizer::SetMessageConsumer to define a message consumer, if needed).
|
||||
//
|
||||
// If |preserve_interface| is true, all non-io variables in the entry point
|
||||
// interface are considered live and are not eliminated.
|
||||
//
|
||||
// If all the passes are registered successfully, it returns true.
|
||||
bool RegisterPassesFromFlags(const std::vector<std::string>& flags);
|
||||
bool RegisterPassesFromFlags(const std::vector<std::string>& flags,
|
||||
bool preserve_interface);
|
||||
|
||||
// Registers the optimization pass associated with |flag|. This only accepts
|
||||
// |flag| values of the form "--pass_name[=pass_args]". If no such pass
|
||||
|
@ -139,7 +156,11 @@ class Optimizer {
|
|||
//
|
||||
// --legalize-hlsl: Registers all passes that legalize SPIR-V generated by an
|
||||
// HLSL front-end.
|
||||
//
|
||||
// If |preserve_interface| is true, all non-io variables in the entry point
|
||||
// interface are considered live and are not eliminated.
|
||||
bool RegisterPassFromFlag(const std::string& flag);
|
||||
bool RegisterPassFromFlag(const std::string& flag, bool preserve_interface);
|
||||
|
||||
// Validates that |flag| has a valid format. Strings accepted:
|
||||
//
|
||||
|
@ -218,7 +239,7 @@ class Optimizer {
|
|||
Optimizer& SetValidateAfterAll(bool validate);
|
||||
|
||||
private:
|
||||
struct Impl; // Opaque struct for holding internal data.
|
||||
struct SPIRV_TOOLS_LOCAL Impl; // Opaque struct for holding internal data.
|
||||
std::unique_ptr<Impl> impl_; // Unique pointer to internal data.
|
||||
};
|
||||
|
||||
|
@ -726,64 +747,6 @@ Optimizer::PassToken CreateReduceLoadSizePass(
|
|||
// them into a single instruction where possible.
|
||||
Optimizer::PassToken CreateCombineAccessChainsPass();
|
||||
|
||||
// Create a pass to instrument bindless descriptor checking
|
||||
// This pass instruments all bindless references to check that descriptor
|
||||
// array indices are inbounds, and if the descriptor indexing extension is
|
||||
// enabled, that the descriptor has been initialized. If the reference is
|
||||
// invalid, a record is written to the debug output buffer (if space allows)
|
||||
// and a null value is returned. This pass is designed to support bindless
|
||||
// validation in the Vulkan validation layers.
|
||||
//
|
||||
// TODO(greg-lunarg): Add support for buffer references. Currently only does
|
||||
// checking for image references.
|
||||
//
|
||||
// Dead code elimination should be run after this pass as the original,
|
||||
// potentially invalid code is not removed and could cause undefined behavior,
|
||||
// including crashes. It may also be beneficial to run Simplification
|
||||
// (ie Constant Propagation), DeadBranchElim and BlockMerge after this pass to
|
||||
// optimize instrument code involving the testing of compile-time constants.
|
||||
// It is also generally recommended that this pass (and all
|
||||
// instrumentation passes) be run after any legalization and optimization
|
||||
// passes. This will give better analysis for the instrumentation and avoid
|
||||
// potentially de-optimizing the instrument code, for example, inlining
|
||||
// the debug record output function throughout the module.
|
||||
//
|
||||
// The instrumentation will read and write buffers in debug
|
||||
// descriptor set |desc_set|. It will write |shader_id| in each output record
|
||||
// to identify the shader module which generated the record.
|
||||
// |desc_length_enable| controls instrumentation of runtime descriptor array
|
||||
// references, |desc_init_enable| controls instrumentation of descriptor
|
||||
// initialization checking, and |buff_oob_enable| controls instrumentation
|
||||
// of storage and uniform buffer bounds checking, all of which require input
|
||||
// buffer support. |texbuff_oob_enable| controls instrumentation of texel
|
||||
// buffers, which does not require input buffer support.
|
||||
Optimizer::PassToken CreateInstBindlessCheckPass(
|
||||
uint32_t desc_set, uint32_t shader_id, bool desc_length_enable = false,
|
||||
bool desc_init_enable = false, bool buff_oob_enable = false,
|
||||
bool texbuff_oob_enable = false);
|
||||
|
||||
// Create a pass to instrument physical buffer address checking
|
||||
// This pass instruments all physical buffer address references to check that
|
||||
// all referenced bytes fall in a valid buffer. If the reference is
|
||||
// invalid, a record is written to the debug output buffer (if space allows)
|
||||
// and a null value is returned. This pass is designed to support buffer
|
||||
// address validation in the Vulkan validation layers.
|
||||
//
|
||||
// Dead code elimination should be run after this pass as the original,
|
||||
// potentially invalid code is not removed and could cause undefined behavior,
|
||||
// including crashes. Instruction simplification would likely also be
|
||||
// beneficial. It is also generally recommended that this pass (and all
|
||||
// instrumentation passes) be run after any legalization and optimization
|
||||
// passes. This will give better analysis for the instrumentation and avoid
|
||||
// potentially de-optimizing the instrument code, for example, inlining
|
||||
// the debug record output function throughout the module.
|
||||
//
|
||||
// The instrumentation will read and write buffers in debug
|
||||
// descriptor set |desc_set|. It will write |shader_id| in each output record
|
||||
// to identify the shader module which generated the record.
|
||||
Optimizer::PassToken CreateInstBuffAddrCheckPass(uint32_t desc_set,
|
||||
uint32_t shader_id);
|
||||
|
||||
// Create a pass to instrument OpDebugPrintf instructions.
|
||||
// This pass replaces all OpDebugPrintf instructions with instructions to write
|
||||
// a record containing the string id and the all specified values into a special
|
||||
|
@ -864,14 +827,19 @@ Optimizer::PassToken CreateReplaceDescArrayAccessUsingVarIndexPass();
|
|||
|
||||
// Create descriptor scalar replacement pass.
|
||||
// This pass replaces every array variable |desc| that has a DescriptorSet and
|
||||
// Binding decorations with a new variable for each element of the array.
|
||||
// Suppose |desc| was bound at binding |b|. Then the variable corresponding to
|
||||
// |desc[i]| will have binding |b+i|. The descriptor set will be the same. It
|
||||
// is assumed that no other variable already has a binding that will used by one
|
||||
// of the new variables. If not, the pass will generate invalid Spir-V. All
|
||||
// accesses to |desc| must be OpAccessChain instructions with a literal index
|
||||
// for the first index.
|
||||
// Binding decorations with a new variable for each element of the
|
||||
// array/composite. Suppose |desc| was bound at binding |b|. Then the variable
|
||||
// corresponding to |desc[i]| will have binding |b+i|. The descriptor set will
|
||||
// be the same. It is assumed that no other variable already has a binding that
|
||||
// will used by one of the new variables. If not, the pass will generate
|
||||
// invalid Spir-V. All accesses to |desc| must be OpAccessChain instructions
|
||||
// with a literal index for the first index. This variant flattens both
|
||||
// composites and arrays.
|
||||
Optimizer::PassToken CreateDescriptorScalarReplacementPass();
|
||||
// This variant flattens only composites.
|
||||
Optimizer::PassToken CreateDescriptorCompositeScalarReplacementPass();
|
||||
// This variant flattens only arrays.
|
||||
Optimizer::PassToken CreateDescriptorArrayScalarReplacementPass();
|
||||
|
||||
// Create a pass to replace each OpKill instruction with a function call to a
|
||||
// function that has a single OpKill. Also replace each OpTerminateInvocation
|
||||
|
@ -893,6 +861,12 @@ Optimizer::PassToken CreateAmdExtToKhrPass();
|
|||
// propagated into their final positions.
|
||||
Optimizer::PassToken CreateInterpolateFixupPass();
|
||||
|
||||
// Replace OpExtInst instructions with OpExtInstWithForwardRefsKHR when
|
||||
// the instruction contains a forward reference to another debug instuction.
|
||||
// Replace OpExtInstWithForwardRefsKHR with OpExtInst when there are no forward
|
||||
// reference to another debug instruction.
|
||||
Optimizer::PassToken CreateOpExtInstWithForwardReferenceFixupPass();
|
||||
|
||||
// Removes unused components from composite input variables. Current
|
||||
// implementation just removes trailing unused components from input arrays
|
||||
// and structs. The pass performs best after maximizing dead code removal.
|
||||
|
@ -971,6 +945,41 @@ Optimizer::PassToken CreateRemoveDontInlinePass();
|
|||
// object, currently the pass would remove accesschain pointer argument passed
|
||||
// to the function
|
||||
Optimizer::PassToken CreateFixFuncCallArgumentsPass();
|
||||
|
||||
// Creates a trim-capabilities pass.
|
||||
// This pass removes unused capabilities for a given module, and if possible,
|
||||
// associated extensions.
|
||||
// See `trim_capabilities.h` for the list of supported capabilities.
|
||||
//
|
||||
// If the module contains unsupported capabilities, this pass will ignore them.
|
||||
// This should be fine in most cases, but could yield to incorrect results if
|
||||
// the unknown capability interacts with one of the trimmed capabilities.
|
||||
Optimizer::PassToken CreateTrimCapabilitiesPass();
|
||||
|
||||
// Creates a struct-packing pass.
|
||||
// This pass re-assigns all offset layout decorators to tightly pack
|
||||
// the struct with OpName matching `structToPack` according to the given packing
|
||||
// rule. Accepted packing rules are: std140, std140EnhancedLayout, std430,
|
||||
// std430EnhancedLayout, hlslCbuffer, hlslCbufferPackOffset, scalar,
|
||||
// scalarEnhancedLayout.
|
||||
Optimizer::PassToken CreateStructPackingPass(const char* structToPack,
|
||||
const char* packingRule);
|
||||
|
||||
// Creates a switch-descriptorset pass.
|
||||
// This pass changes any DescriptorSet decorations with the value |ds_from| to
|
||||
// use the new value |ds_to|.
|
||||
Optimizer::PassToken CreateSwitchDescriptorSetPass(uint32_t ds_from,
|
||||
uint32_t ds_to);
|
||||
|
||||
// Creates an invocation interlock placement pass.
|
||||
// This pass ensures that an entry point will have at most one
|
||||
// OpBeginInterlockInvocationEXT and one OpEndInterlockInvocationEXT, in that
|
||||
// order.
|
||||
Optimizer::PassToken CreateInvocationInterlockPlacementPass();
|
||||
|
||||
// Creates a pass to add/remove maximal reconvergence execution mode.
|
||||
// This pass either adds or removes maximal reconvergence from all entry points.
|
||||
Optimizer::PassToken CreateModifyMaximalReconvergencePass(bool add);
|
||||
} // namespace spvtools
|
||||
|
||||
#endif // INCLUDE_SPIRV_TOOLS_OPTIMIZER_HPP_
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue