mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-28 03:30:20 +00:00
remove clblast, part 2
This commit is contained in:
parent
7f485e5287
commit
5c6cc02985
64 changed files with 4 additions and 30034 deletions
4
Makefile
4
Makefile
|
|
@ -55,8 +55,8 @@ ifdef KCPP_SANITIZE
|
|||
CFLAGS += -fsanitize=undefined -fsanitize-undefined-trap-on-error
|
||||
CXXFLAGS += -fsanitize=undefined -fsanitize-undefined-trap-on-error
|
||||
endif
|
||||
CFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./vendor -I./vendor/stb -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/ttscpp/include -I./otherarch/ttscpp/src -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_REPACK
|
||||
CXXFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./vendor -I./vendor/stb -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/ttscpp/include -I./otherarch/ttscpp/src -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_REPACK
|
||||
CFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./vendor -I./vendor/stb -I./include -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/ttscpp/include -I./otherarch/ttscpp/src -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_REPACK
|
||||
CXXFLAGS += -I. -Iggml/include -Iggml/src -Iggml/src/ggml-cpu -Iinclude -Isrc -I./common -I./vendor -I./vendor/stb -I./include -I./otherarch -I./otherarch/tools -I./otherarch/sdcpp -I./otherarch/ttscpp/include -I./otherarch/ttscpp/src -I./otherarch/sdcpp/thirdparty -I./include/vulkan -O3 -fno-finite-math-only -std=c++17 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -DGGML_USE_CPU -DGGML_USE_CPU_REPACK
|
||||
|
||||
ifndef KCPP_DEBUG
|
||||
CFLAGS += -DNDEBUG -s
|
||||
|
|
|
|||
BIN
OpenCL.dll
BIN
OpenCL.dll
Binary file not shown.
BIN
clblast.dll
BIN
clblast.dll
Binary file not shown.
|
|
@ -701,14 +701,7 @@ extern "C" {
|
|||
|
||||
void * extra; // extra things e.g. for ggml-cuda.cu
|
||||
|
||||
union {
|
||||
char padding[8];
|
||||
union {
|
||||
char trimmed_pad_1[3];
|
||||
char clblast_offload_gpu; //we sneak the flag for gpu offloading for clblast into the padding
|
||||
char trimmed_pad_2[4];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
||||
|
|
|
|||
|
|
@ -76,9 +76,6 @@
|
|||
// precomputed f32 table for f16 (256 KB) (simd-mappings.h)
|
||||
float ggml_table_f32_f16[1 << 16];
|
||||
|
||||
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
|
||||
#include "ggml_v3b-opencl.h"
|
||||
#endif
|
||||
#if defined(__ARM_ARCH)
|
||||
struct ggml_arm_arch_features_type {
|
||||
int sve_cnt;
|
||||
|
|
@ -1263,15 +1260,6 @@ void ggml_compute_forward_mul_mat(
|
|||
// compute by src0 rows
|
||||
|
||||
// TODO: extract to "extra_op"
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
||||
if (params->ith == 0) {
|
||||
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if GGML_USE_LLAMAFILE
|
||||
// broadcast factors
|
||||
const int64_t r2 = ne12 / ne02;
|
||||
|
|
@ -3628,11 +3616,6 @@ struct ggml_cplan ggml_graph_plan(
|
|||
{
|
||||
const enum ggml_type vec_dot_type = type_traits_cpu[node->src[0]->type].vec_dot_type;
|
||||
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) {
|
||||
cur = ggml_cl_mul_mat_get_wsize(node->src[0], node->src[1], node);
|
||||
} else
|
||||
#endif
|
||||
if (node->src[1]->type != vec_dot_type) {
|
||||
size_t cur2 = ggml_row_size(vec_dot_type, ggml_nelements(node->src[1]));
|
||||
cur = MAX(cur, cur2);
|
||||
|
|
@ -4561,9 +4544,6 @@ void ggml_cpu_init(void) {
|
|||
#if defined(__ARM_ARCH)
|
||||
ggml_init_arm_arch_features();
|
||||
#endif
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
ggml_cl_init();
|
||||
#endif
|
||||
|
||||
#if defined(__riscv)
|
||||
ggml_init_riscv_arch_features();
|
||||
|
|
|
|||
|
|
@ -2324,9 +2324,6 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
|
|||
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
|
||||
|
||||
bool permit_repack = true;
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
permit_repack = false; //kcpp: clblast cannot handle repacking
|
||||
#endif
|
||||
|
||||
// instance for Q8_0
|
||||
static const ggml::cpu::repack::tensor_traits<block_q8_0, 4, 4, GGML_TYPE_Q8_0> q8_0_4x4_q8_0;
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ static int savestate_limit = 0;
|
|||
static std::vector<savestate_data> savestates;
|
||||
|
||||
inline int kcpp_cpu_has_blas(void) {
|
||||
#if defined(GGML_USE_BLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_SYCL)
|
||||
#if defined(GGML_USE_BLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_SYCL)
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
|
|
@ -2389,21 +2389,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
if(file_format==FileFormat::GGUF_GENERIC && model_params.n_gpu_layers>0)
|
||||
{
|
||||
if(file_format_meta.model_architecture == GGUFArch::ARCH_FALCON)
|
||||
{
|
||||
printf("\nOpenCL does not support GPU Layer offloading for this model architecture! GPU Offload has been disabled.\n");
|
||||
model_params.n_gpu_layers = 0;
|
||||
}
|
||||
else if(file_format_meta.n_expert_count>1)
|
||||
{
|
||||
printf("\nOpenCL cannot use regular GPU offloading for this model architecture. A fallback GPU offloader will be used with degraded performance.\n");
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(GGML_USE_CUDA)
|
||||
if(kcpp_parseinfo_maindevice>0)
|
||||
{
|
||||
|
|
@ -3278,7 +3263,7 @@ int GetThreadsToUse(bool blasmode)
|
|||
{
|
||||
if (blasmode)
|
||||
{
|
||||
#if defined(GGML_USE_CLBLAST) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN)
|
||||
#if defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN)
|
||||
return kcpp_data->n_blasthreads;
|
||||
#else
|
||||
return std::min(kcpp_data->n_blasthreads, 4);
|
||||
|
|
|
|||
|
|
@ -1,50 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtils_Export.h"
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/cl.h>
|
||||
|
||||
// STL includes
|
||||
#include <time.h>
|
||||
|
||||
UTILS_EXPORT
|
||||
cl_context cl_util_get_context(const cl_uint plat_id, const cl_uint dev_id,
|
||||
const cl_device_type type, cl_int* const error);
|
||||
UTILS_EXPORT
|
||||
cl_device_id cl_util_get_device(const cl_uint plat_id, const cl_uint dev_id,
|
||||
const cl_device_type type, cl_int* const error);
|
||||
|
||||
UTILS_EXPORT
|
||||
cl_int cl_util_print_device_info(const cl_device_id device);
|
||||
|
||||
UTILS_EXPORT
|
||||
char* cl_util_get_device_info(const cl_device_id device,
|
||||
const cl_device_info info, cl_int* const error);
|
||||
UTILS_EXPORT
|
||||
char* cl_util_get_platform_info(const cl_platform_id platform,
|
||||
const cl_platform_info info,
|
||||
cl_int* const error);
|
||||
|
||||
// build program and show log if build is not successful
|
||||
UTILS_EXPORT
|
||||
cl_int cl_util_build_program(const cl_program pr, const cl_device_id dev,
|
||||
const char* const opt);
|
||||
|
||||
#define GET_CURRENT_TIMER(time) \
|
||||
struct timespec time; \
|
||||
timespec_get(&time, TIME_UTC); \
|
||||
{ \
|
||||
}
|
||||
|
||||
#define TIMER_DIFFERENCE(dt, time1, time2) \
|
||||
{ \
|
||||
dt = (time2.tv_sec - time1.tv_sec) * 1000000000 \
|
||||
+ (time2.tv_nsec - time1.tv_nsec); \
|
||||
}
|
||||
|
||||
#define START_TIMER GET_CURRENT_TIMER(start_timer1)
|
||||
#define STOP_TIMER(dt) \
|
||||
GET_CURRENT_TIMER(stop_timer2) \
|
||||
TIMER_DIFFERENCE(dt, start_timer1, stop_timer2)
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL SDK includes
|
||||
#include "OpenCLUtilsCpp_Export.h"
|
||||
|
||||
#include <CL/Utils/Error.hpp>
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
Context UTILSCPP_EXPORT get_context(cl_uint plat_id, cl_uint dev_id,
|
||||
cl_device_type type,
|
||||
cl_int* error = nullptr);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,84 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
// STL includes
|
||||
#include <stddef.h>
|
||||
#include <utility> // std::forward, std::integer_sequence
|
||||
#include <tuple> // std::tuple, std::get
|
||||
#include <initializer_list> // std::initializer_list
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
namespace detail {
|
||||
// Borrowed from:
|
||||
// https://www.fluentcpp.com/2019/03/05/for_each_arg-applying-a-function-to-each-argument-of-a-function-in-cpp/
|
||||
template <class F, class... Args> F for_each_arg(F f, Args&&... args)
|
||||
{
|
||||
(void)std::initializer_list<int>{ (
|
||||
(void)f(std::forward<Args>(args)), 0)... };
|
||||
return f;
|
||||
}
|
||||
|
||||
namespace impl {
|
||||
// Borrowed from: https://stackoverflow.com/a/16387374/1476661
|
||||
template <typename T, typename F, int... Is>
|
||||
void for_each_in_tuple(T&& t, F&& f,
|
||||
std::integer_sequence<int, Is...>)
|
||||
{
|
||||
auto l = {
|
||||
(std::forward<F>(f)(std::get<Is>(std::forward<T>(t))), 0)...
|
||||
};
|
||||
(void)l;
|
||||
}
|
||||
}
|
||||
template <typename... Ts, typename F>
|
||||
void for_each_in_tuple(std::tuple<Ts...> const& t, F&& f)
|
||||
{
|
||||
impl::for_each_in_tuple(
|
||||
t, std::forward<F>(f),
|
||||
std::make_integer_sequence<int, sizeof...(Ts)>());
|
||||
}
|
||||
|
||||
namespace impl {
|
||||
// Borrowed from
|
||||
// https://codereview.stackexchange.com/questions/193420/apply-a-function-to-each-element-of-a-tuple-map-a-tuple
|
||||
template <class F, typename Tuple, std::size_t... Is>
|
||||
auto transform_tuple(Tuple&& t, F&& f, std::index_sequence<Is...>)
|
||||
{
|
||||
return std::make_tuple(std::forward<F>(f)(std::get<Is>(t))...);
|
||||
}
|
||||
}
|
||||
template <class F, typename... Args>
|
||||
auto transform_tuple(const std::tuple<Args...>& t, F&& f)
|
||||
{
|
||||
return impl::transform_tuple(
|
||||
t, std::forward<F>(f),
|
||||
std::make_index_sequence<sizeof...(Args)>{});
|
||||
}
|
||||
|
||||
namespace impl {
|
||||
// Borrowed from
|
||||
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3658.html
|
||||
// with modifications of Casey Carter at
|
||||
// https://stackoverflow.com/a/51365112/1476661
|
||||
template <typename F, typename Tuple, std::size_t... I>
|
||||
auto apply(F&& f, Tuple&& args, std::index_sequence<I...>)
|
||||
-> decltype(std::forward<F>(f)(
|
||||
std::get<I>(std::forward<Tuple>(args))...))
|
||||
{
|
||||
return std::forward<F>(f)(
|
||||
std::get<I>(std::forward<Tuple>(args))...);
|
||||
}
|
||||
}
|
||||
template <typename F, typename Tuple,
|
||||
typename Indices = std::make_index_sequence<
|
||||
std::tuple_size<std::remove_reference_t<Tuple>>::value>>
|
||||
auto apply(F&& f, Tuple&& args)
|
||||
-> decltype(impl::apply(std::forward<F>(f),
|
||||
std::forward<Tuple>(args), Indices()))
|
||||
{
|
||||
return impl::apply(std::forward<F>(f), std::forward<Tuple>(args),
|
||||
Indices());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "OpenCLUtilsCpp_Export.h"
|
||||
#include <CL/Utils/Error.hpp>
|
||||
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
bool UTILSCPP_EXPORT opencl_c_version_contains(
|
||||
const cl::Device& device, const cl::string& version_fragment);
|
||||
|
||||
bool UTILSCPP_EXPORT supports_extension(const cl::Device& device,
|
||||
const cl::string& extension);
|
||||
|
||||
#ifdef CL_VERSION_3_0
|
||||
bool UTILSCPP_EXPORT supports_feature(const cl::Device& device,
|
||||
const cl::string& feature_name);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
@ -1,88 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtils_Export.h"
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include <CL/Utils/ErrorCodes.h>
|
||||
|
||||
// STL includes
|
||||
#include <stdio.h> // fprintf
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/cl.h>
|
||||
|
||||
// RET = function returns error code
|
||||
// PAR = functions sets error code in the paremeter
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
||||
#define OCLERROR_RET(func, err, label) \
|
||||
do \
|
||||
{ \
|
||||
err = func; \
|
||||
if (err != CL_SUCCESS) \
|
||||
{ \
|
||||
cl_util_print_error(err); \
|
||||
fprintf(stderr, "on line %d, in file %s\n%s\n", __LINE__, \
|
||||
__FILE__, #func); \
|
||||
goto label; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define OCLERROR_PAR(func, err, label) \
|
||||
do \
|
||||
{ \
|
||||
func; \
|
||||
if (err != CL_SUCCESS) \
|
||||
{ \
|
||||
cl_util_print_error(err); \
|
||||
fprintf(stderr, "on line %d, in file %s\n%s\n", __LINE__, \
|
||||
__FILE__, #func); \
|
||||
goto label; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define MEM_CHECK(func, err, label) \
|
||||
do \
|
||||
{ \
|
||||
if ((func) == NULL) \
|
||||
{ \
|
||||
err = CL_OUT_OF_HOST_MEMORY; \
|
||||
cl_util_print_error(err); \
|
||||
fprintf(stderr, "on line %d, in file %s\n%s\n", __LINE__, \
|
||||
__FILE__, #func); \
|
||||
goto label; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define OCLERROR_RET(func, err, label) \
|
||||
do \
|
||||
{ \
|
||||
err = func; \
|
||||
if (err != CL_SUCCESS) goto label; \
|
||||
} while (0)
|
||||
|
||||
#define OCLERROR_PAR(func, err, label) \
|
||||
do \
|
||||
{ \
|
||||
func; \
|
||||
if (err != CL_SUCCESS) goto label; \
|
||||
} while (0)
|
||||
|
||||
#define MEM_CHECK(func, err, label) \
|
||||
do \
|
||||
{ \
|
||||
if ((func) == NULL) \
|
||||
{ \
|
||||
err = CL_OUT_OF_HOST_MEMORY; \
|
||||
goto label; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
UTILS_EXPORT
|
||||
void cl_util_print_error(cl_int error);
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtilsCpp_Export.h"
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include <CL/Utils/ErrorCodes.h>
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
#if defined(CL_HPP_ENABLE_EXCEPTIONS)
|
||||
/*! \brief Exception class
|
||||
*
|
||||
* This may be thrown by SDK utility functions when
|
||||
* CL_HPP_ENABLE_EXCEPTIONS is defined.
|
||||
*/
|
||||
class Error : public std::exception {
|
||||
private:
|
||||
int err_;
|
||||
const char* errStr_;
|
||||
|
||||
public:
|
||||
/*! \brief Create a new SDK error exception for a given error code
|
||||
* and corresponding message.
|
||||
*
|
||||
* \param err error code value.
|
||||
*
|
||||
* \param errStr a descriptive string that must remain in scope until
|
||||
* handling of the exception has concluded. If set, it
|
||||
* will be returned by what().
|
||||
*/
|
||||
Error(cl_int err, const char* errStr = NULL): err_(err), errStr_(errStr)
|
||||
{}
|
||||
|
||||
~Error() throw() {}
|
||||
|
||||
/*! \brief Get error string associated with exception
|
||||
*
|
||||
* \return A memory pointer to the error message string.
|
||||
*/
|
||||
virtual const char* what() const throw()
|
||||
{
|
||||
if (errStr_ == NULL)
|
||||
{
|
||||
return "empty";
|
||||
}
|
||||
else
|
||||
{
|
||||
return errStr_;
|
||||
}
|
||||
}
|
||||
|
||||
/*! \brief Get error code associated with exception
|
||||
*
|
||||
* \return The error code.
|
||||
*/
|
||||
cl_int err(void) const { return err_; }
|
||||
};
|
||||
#endif
|
||||
|
||||
namespace detail {
|
||||
UTILSCPP_EXPORT cl_int errHandler(cl_int err, cl_int* errPtr,
|
||||
const char* errStr = nullptr);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#define CL_UTIL_INDEX_OUT_OF_RANGE -2000
|
||||
#define CL_UTIL_DEVICE_NOT_INTEROPERABLE -2001
|
||||
#define CL_UTIL_FILE_OPERATION_ERROR -2002
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtils_Export.h"
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/cl.h>
|
||||
|
||||
UTILS_EXPORT
|
||||
cl_ulong cl_util_get_event_duration(const cl_event event,
|
||||
const cl_profiling_info start,
|
||||
const cl_profiling_info end,
|
||||
cl_int* const error);
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL SDK includes
|
||||
#include "OpenCLUtilsCpp_Export.h"
|
||||
|
||||
// STL includes
|
||||
#include <chrono>
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
template <cl_int From, cl_int To, typename Dur = std::chrono::nanoseconds>
|
||||
auto get_duration(cl::Event& ev)
|
||||
{
|
||||
return std::chrono::duration_cast<Dur>(std::chrono::nanoseconds{
|
||||
ev.getProfilingInfo<To>() - ev.getProfilingInfo<From>() });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtils_Export.h"
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/cl.h>
|
||||
|
||||
// read all the text file contents securely in ANSI C89
|
||||
// return pointer to C-string with file contents
|
||||
// can handle streams with no known size and no support for fseek
|
||||
// based on https://stackoverflow.com/questions/14002954/ by Nominal Animal
|
||||
UTILS_EXPORT
|
||||
char* cl_util_read_text_file(const char* const filename, size_t* const length,
|
||||
cl_int* const error);
|
||||
|
||||
// read all the binary file contents securely in ANSI C89
|
||||
// return pointer to file contents
|
||||
// can handle streams with no known size and no support for fseek
|
||||
// based on https://stackoverflow.com/questions/14002954/ by Nominal Animal
|
||||
UTILS_EXPORT
|
||||
unsigned char* cl_util_read_binary_file(const char* const filename,
|
||||
size_t* const length,
|
||||
cl_int* const error);
|
||||
|
||||
// write binaries of OpenCL compiled program
|
||||
// binaries are written as separate files for each device
|
||||
// with file name "(program_file_name)_(name of device).bin"
|
||||
// based on variant of Logan
|
||||
// http://logan.tw/posts/2014/11/22/pre-compile-the-opencl-kernel-program-part-2/
|
||||
UTILS_EXPORT
|
||||
cl_int cl_util_write_binaries(const cl_program program,
|
||||
const char* const program_file_name);
|
||||
|
||||
// read binaries of OpenCL compiled program
|
||||
// from files of file names "(program_file_name)_(name of device).bin"
|
||||
UTILS_EXPORT
|
||||
cl_program cl_util_read_binaries(const cl_context context,
|
||||
const cl_device_id* const devices,
|
||||
const cl_uint num_devices,
|
||||
const char* const program_file_name,
|
||||
cl_int* const error);
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL SDK includes
|
||||
#include <CL/Utils/Utils.hpp>
|
||||
|
||||
// STL includes
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
// Scott Meyers, Effective STL, Addison-Wesley Professional, 2001, Item 29
|
||||
// with error handling
|
||||
UTILSCPP_EXPORT
|
||||
std::string read_text_file(const char* const filename, cl_int* const error)
|
||||
{
|
||||
std::ifstream in(filename);
|
||||
if (in.good())
|
||||
{
|
||||
try
|
||||
{
|
||||
std::string red((std::istreambuf_iterator<char>(in)),
|
||||
std::istreambuf_iterator<char>());
|
||||
if (in.good() && in.eof())
|
||||
{
|
||||
if (error != nullptr) *error = CL_SUCCESS;
|
||||
return red;
|
||||
}
|
||||
else
|
||||
{
|
||||
detail::errHandler(CL_UTIL_FILE_OPERATION_ERROR, error,
|
||||
"File read error!");
|
||||
return std::string();
|
||||
}
|
||||
} catch (std::bad_alloc& ex)
|
||||
{
|
||||
detail::errHandler(CL_OUT_OF_RESOURCES, error,
|
||||
"Bad allocation!");
|
||||
return std::string();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
detail::errHandler(CL_INVALID_VALUE, error, "No file!");
|
||||
return std::string();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "OpenCLUtilsCpp_Export.h"
|
||||
#include <CL/Utils/Error.hpp>
|
||||
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
vector<cl_context_properties>
|
||||
UTILSCPP_EXPORT get_interop_context_properties(const cl::Device& plat,
|
||||
cl_int* error = nullptr);
|
||||
|
||||
Context UTILSCPP_EXPORT get_interop_context(int plat_id, int dev_id,
|
||||
cl_device_type type,
|
||||
cl_int* error = nullptr);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
|
||||
#ifndef UTILSCPP_EXPORT_H
|
||||
#define UTILSCPP_EXPORT_H
|
||||
|
||||
#ifdef OPENCLUTILSCPP_STATIC_DEFINE
|
||||
# define UTILSCPP_EXPORT
|
||||
# define OPENCLUTILSCPP_NO_EXPORT
|
||||
#else
|
||||
# ifndef UTILSCPP_EXPORT
|
||||
# ifdef OpenCLUtilsCpp_EXPORTS
|
||||
/* We are building this library */
|
||||
# define UTILSCPP_EXPORT
|
||||
# else
|
||||
/* We are using this library */
|
||||
# define UTILSCPP_EXPORT
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifndef OPENCLUTILSCPP_NO_EXPORT
|
||||
# define OPENCLUTILSCPP_NO_EXPORT
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef OPENCLUTILSCPP_DEPRECATED
|
||||
# define OPENCLUTILSCPP_DEPRECATED __declspec(deprecated)
|
||||
#endif
|
||||
|
||||
#ifndef OPENCLUTILSCPP_DEPRECATED_EXPORT
|
||||
# define OPENCLUTILSCPP_DEPRECATED_EXPORT UTILSCPP_EXPORT OPENCLUTILSCPP_DEPRECATED
|
||||
#endif
|
||||
|
||||
#ifndef OPENCLUTILSCPP_DEPRECATED_NO_EXPORT
|
||||
# define OPENCLUTILSCPP_DEPRECATED_NO_EXPORT OPENCLUTILSCPP_NO_EXPORT OPENCLUTILSCPP_DEPRECATED
|
||||
#endif
|
||||
|
||||
#if 0 /* DEFINE_NO_DEPRECATED */
|
||||
# ifndef OPENCLUTILSCPP_NO_DEPRECATED
|
||||
# define OPENCLUTILSCPP_NO_DEPRECATED
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* UTILSCPP_EXPORT_H */
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
|
||||
#ifndef UTILS_EXPORT_H
|
||||
#define UTILS_EXPORT_H
|
||||
|
||||
#ifdef OPENCLUTILS_STATIC_DEFINE
|
||||
# define UTILS_EXPORT
|
||||
# define OPENCLUTILS_NO_EXPORT
|
||||
#else
|
||||
# ifndef UTILS_EXPORT
|
||||
# ifdef OpenCLUtils_EXPORTS
|
||||
/* We are building this library */
|
||||
# define UTILS_EXPORT
|
||||
# else
|
||||
/* We are using this library */
|
||||
# define UTILS_EXPORT
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifndef OPENCLUTILS_NO_EXPORT
|
||||
# define OPENCLUTILS_NO_EXPORT
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef OPENCLUTILS_DEPRECATED
|
||||
# define OPENCLUTILS_DEPRECATED __declspec(deprecated)
|
||||
#endif
|
||||
|
||||
#ifndef OPENCLUTILS_DEPRECATED_EXPORT
|
||||
# define OPENCLUTILS_DEPRECATED_EXPORT UTILS_EXPORT OPENCLUTILS_DEPRECATED
|
||||
#endif
|
||||
|
||||
#ifndef OPENCLUTILS_DEPRECATED_NO_EXPORT
|
||||
# define OPENCLUTILS_DEPRECATED_NO_EXPORT OPENCLUTILS_NO_EXPORT OPENCLUTILS_DEPRECATED
|
||||
#endif
|
||||
|
||||
#if 0 /* DEFINE_NO_DEPRECATED */
|
||||
# ifndef OPENCLUTILS_NO_DEPRECATED
|
||||
# define OPENCLUTILS_NO_DEPRECATED
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* UTILS_EXPORT_H */
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "OpenCLUtilsCpp_Export.h"
|
||||
#include <CL/Utils/Error.hpp>
|
||||
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace cl {
|
||||
namespace util {
|
||||
bool UTILSCPP_EXPORT supports_extension(const cl::Platform& platform,
|
||||
const cl::string& extension);
|
||||
|
||||
bool UTILSCPP_EXPORT platform_version_contains(
|
||||
const cl::Platform& platform, const cl::string& version_fragment);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtils_Export.h"
|
||||
|
||||
#include <CL/Utils/Error.h>
|
||||
#include <CL/Utils/File.h>
|
||||
#include <CL/Utils/Context.h>
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/cl.h>
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
// OpenCL Utils includes
|
||||
#include "OpenCLUtils_Export.h"
|
||||
|
||||
#include <CL/Utils/Detail.hpp>
|
||||
#include <CL/Utils/Error.hpp>
|
||||
#include <CL/Utils/Platform.hpp>
|
||||
#include <CL/Utils/Device.hpp>
|
||||
#include <CL/Utils/Context.hpp>
|
||||
#include <CL/Utils/Event.hpp>
|
||||
|
||||
// OpenCL includes
|
||||
#include <CL/opencl.hpp>
|
||||
1936
include/CL/cl.h
1936
include/CL/cl.h
File diff suppressed because it is too large
Load diff
|
|
@ -1,18 +0,0 @@
|
|||
//
|
||||
// Copyright (c) 2020 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#include <CL/opencl.hpp>
|
||||
#pragma message("cl2.hpp has been renamed to opencl.hpp to make it clear that it supports all versions of OpenCL. Please include opencl.hpp directly.")
|
||||
|
|
@ -1,154 +0,0 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_D3D10_H
|
||||
#define __OPENCL_CL_D3D10_H
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >=1500
|
||||
#pragma warning( push )
|
||||
#pragma warning( disable : 4201 )
|
||||
#pragma warning( disable : 5105 )
|
||||
#endif
|
||||
#endif
|
||||
#include <d3d10.h>
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >=1500
|
||||
#pragma warning( pop )
|
||||
#endif
|
||||
#endif
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d10_sharing */
|
||||
#define cl_khr_d3d10_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d10_device_source_khr;
|
||||
typedef cl_uint cl_d3d10_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_D3D10_DEVICE_KHR -1002
|
||||
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
|
||||
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
|
||||
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
|
||||
|
||||
/* cl_d3d10_device_source_nv */
|
||||
#define CL_D3D10_DEVICE_KHR 0x4010
|
||||
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
|
||||
|
||||
/* cl_d3d10_device_set_nv */
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
|
||||
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
|
||||
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
|
||||
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d10_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d10_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
/***************************************************************
|
||||
* cl_intel_sharing_format_query_d3d10
|
||||
***************************************************************/
|
||||
#define cl_intel_sharing_format_query_d3d10 1
|
||||
|
||||
/* when cl_khr_d3d10_sharing is supported */
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetSupportedD3D10TextureFormatsINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint num_entries,
|
||||
DXGI_FORMAT* d3d10_formats,
|
||||
cl_uint* num_texture_formats) ;
|
||||
|
||||
typedef cl_int (CL_API_CALL *
|
||||
clGetSupportedD3D10TextureFormatsINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint num_entries,
|
||||
DXGI_FORMAT* d3d10_formats,
|
||||
cl_uint* num_texture_formats) ;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_D3D10_H */
|
||||
|
||||
|
|
@ -1,156 +0,0 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_D3D11_H
|
||||
#define __OPENCL_CL_D3D11_H
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >=1500
|
||||
#pragma warning( push )
|
||||
#pragma warning( disable : 4201 )
|
||||
#pragma warning( disable : 5105 )
|
||||
#endif
|
||||
#endif
|
||||
#include <d3d11.h>
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >=1500
|
||||
#pragma warning( pop )
|
||||
#endif
|
||||
#endif
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d11_sharing */
|
||||
#define cl_khr_d3d11_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d11_device_source_khr;
|
||||
typedef cl_uint cl_d3d11_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_D3D11_DEVICE_KHR -1006
|
||||
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
|
||||
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
|
||||
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
|
||||
|
||||
/* cl_d3d11_device_source */
|
||||
#define CL_D3D11_DEVICE_KHR 0x4019
|
||||
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
|
||||
|
||||
/* cl_d3d11_device_set */
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
|
||||
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
|
||||
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
|
||||
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d11_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d11_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
/***************************************************************
|
||||
* cl_intel_sharing_format_query_d3d11
|
||||
***************************************************************/
|
||||
#define cl_intel_sharing_format_query_d3d11 1
|
||||
|
||||
/* when cl_khr_d3d11_sharing is supported */
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetSupportedD3D11TextureFormatsINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint plane,
|
||||
cl_uint num_entries,
|
||||
DXGI_FORMAT* d3d11_formats,
|
||||
cl_uint* num_texture_formats) ;
|
||||
|
||||
typedef cl_int (CL_API_CALL *
|
||||
clGetSupportedD3D11TextureFormatsINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint plane,
|
||||
cl_uint num_entries,
|
||||
DXGI_FORMAT* d3d11_formats,
|
||||
cl_uint* num_texture_formats) ;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_D3D11_H */
|
||||
|
||||
|
|
@ -1,268 +0,0 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
/* cl_khr_dx9_media_sharing */
|
||||
#define cl_khr_dx9_media_sharing 1
|
||||
|
||||
typedef cl_uint cl_dx9_media_adapter_type_khr;
|
||||
typedef cl_uint cl_dx9_media_adapter_set_khr;
|
||||
|
||||
#if defined(_WIN32)
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >=1500
|
||||
#pragma warning( push )
|
||||
#pragma warning( disable : 4201 )
|
||||
#pragma warning( disable : 5105 )
|
||||
#endif
|
||||
#endif
|
||||
#include <d3d9.h>
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >=1500
|
||||
#pragma warning( pop )
|
||||
#endif
|
||||
#endif
|
||||
typedef struct _cl_dx9_surface_info_khr
|
||||
{
|
||||
IDirect3DSurface9 *resource;
|
||||
HANDLE shared_handle;
|
||||
} cl_dx9_surface_info_khr;
|
||||
#endif
|
||||
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
|
||||
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
|
||||
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
|
||||
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
|
||||
|
||||
/* cl_media_adapter_type_khr */
|
||||
#define CL_ADAPTER_D3D9_KHR 0x2020
|
||||
#define CL_ADAPTER_D3D9EX_KHR 0x2021
|
||||
#define CL_ADAPTER_DXVA_KHR 0x2022
|
||||
|
||||
/* cl_media_adapter_set_khr */
|
||||
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
|
||||
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
|
||||
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
|
||||
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
|
||||
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
|
||||
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_uint num_media_adapters,
|
||||
cl_dx9_media_adapter_type_khr * media_adapter_type,
|
||||
void * media_adapters,
|
||||
cl_dx9_media_adapter_set_khr media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_dx9_media_adapter_type_khr adapter_type,
|
||||
void * surface_info,
|
||||
cl_uint plane,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
/***************************************
|
||||
* cl_intel_dx9_media_sharing extension *
|
||||
****************************************/
|
||||
|
||||
#define cl_intel_dx9_media_sharing 1
|
||||
|
||||
typedef cl_uint cl_dx9_device_source_intel;
|
||||
typedef cl_uint cl_dx9_device_set_intel;
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_DX9_DEVICE_INTEL -1010
|
||||
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
|
||||
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
|
||||
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
|
||||
|
||||
/* cl_dx9_device_source_intel */
|
||||
#define CL_D3D9_DEVICE_INTEL 0x4022
|
||||
#define CL_D3D9EX_DEVICE_INTEL 0x4070
|
||||
#define CL_DXVA_DEVICE_INTEL 0x4071
|
||||
|
||||
/* cl_dx9_device_set_intel */
|
||||
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
|
||||
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
|
||||
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
|
||||
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
|
||||
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
|
||||
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
|
||||
/******************************************************************************/
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceIDsFromDX9INTEL(
|
||||
cl_platform_id platform,
|
||||
cl_dx9_device_source_intel dx9_device_source,
|
||||
void* dx9_object,
|
||||
cl_dx9_device_set_intel dx9_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_dx9_device_source_intel dx9_device_source,
|
||||
void* dx9_object,
|
||||
cl_dx9_device_set_intel dx9_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromDX9MediaSurfaceINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
IDirect3DSurface9* resource,
|
||||
HANDLE sharedHandle,
|
||||
UINT plane,
|
||||
cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
IDirect3DSurface9* resource,
|
||||
HANDLE sharedHandle,
|
||||
UINT plane,
|
||||
cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireDX9ObjectsINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseDX9ObjectsINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
/***************************************************************
|
||||
* cl_intel_sharing_format_query_dx9
|
||||
***************************************************************/
|
||||
#define cl_intel_sharing_format_query_dx9 1
|
||||
|
||||
/* when cl_khr_dx9_media_sharing or cl_intel_dx9_media_sharing is supported */
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetSupportedDX9MediaSurfaceFormatsINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint plane,
|
||||
cl_uint num_entries,
|
||||
D3DFORMAT* dx9_formats,
|
||||
cl_uint* num_surface_formats) ;
|
||||
|
||||
typedef cl_int (CL_API_CALL *
|
||||
clGetSupportedDX9MediaSurfaceFormatsINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint plane,
|
||||
cl_uint num_entries,
|
||||
D3DFORMAT* dx9_formats,
|
||||
cl_uint* num_surface_formats) ;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */
|
||||
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#include <CL/cl_dx9_media_sharing.h>
|
||||
#pragma message("The Intel DX9 media sharing extensions have been moved into cl_dx9_media_sharing.h. Please include cl_dx9_media_sharing.h directly.")
|
||||
|
|
@ -1,120 +0,0 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_EGL_H
|
||||
#define __OPENCL_CL_EGL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
|
||||
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
|
||||
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
|
||||
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
|
||||
|
||||
/* Error type for clCreateFromEGLImageKHR */
|
||||
#define CL_INVALID_EGL_OBJECT_KHR -1093
|
||||
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
|
||||
|
||||
/* CLeglImageKHR is an opaque handle to an EGLImage */
|
||||
typedef void* CLeglImageKHR;
|
||||
|
||||
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
|
||||
typedef void* CLeglDisplayKHR;
|
||||
|
||||
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
|
||||
typedef void* CLeglSyncKHR;
|
||||
|
||||
/* properties passed to clCreateFromEGLImageKHR */
|
||||
typedef intptr_t cl_egl_image_properties_khr;
|
||||
|
||||
|
||||
#define cl_khr_egl_image 1
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromEGLImageKHR(cl_context context,
|
||||
CLeglDisplayKHR egldisplay,
|
||||
CLeglImageKHR eglimage,
|
||||
cl_mem_flags flags,
|
||||
const cl_egl_image_properties_khr * properties,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglDisplayKHR egldisplay,
|
||||
CLeglImageKHR eglimage,
|
||||
cl_mem_flags flags,
|
||||
const cl_egl_image_properties_khr * properties,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
#define cl_khr_egl_event 1
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromEGLSyncKHR(cl_context context,
|
||||
CLeglSyncKHR sync,
|
||||
CLeglDisplayKHR display,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglSyncKHR sync,
|
||||
CLeglDisplayKHR display,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_EGL_H */
|
||||
2634
include/CL/cl_ext.h
2634
include/CL/cl_ext.h
File diff suppressed because it is too large
Load diff
|
|
@ -1,19 +0,0 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
******************************************************************************/
|
||||
|
||||
#include <CL/cl_ext.h>
|
||||
#pragma message("The Intel extensions have been moved into cl_ext.h. Please include cl_ext.h directly.")
|
||||
|
|
@ -1,194 +0,0 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2021 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_H
|
||||
#define __OPENCL_CL_GL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef cl_uint cl_gl_object_type;
|
||||
typedef cl_uint cl_gl_texture_info;
|
||||
typedef cl_uint cl_gl_platform_info;
|
||||
typedef struct __GLsync *cl_GLsync;
|
||||
|
||||
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
|
||||
#define CL_GL_OBJECT_BUFFER 0x2000
|
||||
#define CL_GL_OBJECT_TEXTURE2D 0x2001
|
||||
#define CL_GL_OBJECT_TEXTURE3D 0x2002
|
||||
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
|
||||
#ifdef CL_VERSION_1_2
|
||||
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
|
||||
#define CL_GL_OBJECT_TEXTURE1D 0x200F
|
||||
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
|
||||
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
|
||||
#endif
|
||||
|
||||
/* cl_gl_texture_info */
|
||||
#define CL_GL_TEXTURE_TARGET 0x2004
|
||||
#define CL_GL_MIPMAP_LEVEL 0x2005
|
||||
#ifdef CL_VERSION_1_2
|
||||
#define CL_GL_NUM_SAMPLES 0x2012
|
||||
#endif
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLBuffer(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLuint bufobj,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#endif
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLRenderbuffer(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLuint renderbuffer,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLObjectInfo(cl_mem memobj,
|
||||
cl_gl_object_type * gl_object_type,
|
||||
cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLTextureInfo(cl_mem memobj,
|
||||
cl_gl_texture_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Deprecated OpenCL 1.1 APIs */
|
||||
extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture2D(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture3D(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
/* cl_khr_gl_sharing extension */
|
||||
|
||||
#define cl_khr_gl_sharing 1
|
||||
|
||||
typedef cl_uint cl_gl_context_info;
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
|
||||
|
||||
/* cl_gl_context_info */
|
||||
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
|
||||
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
|
||||
|
||||
/* Additional cl_context_properties */
|
||||
#define CL_GL_CONTEXT_KHR 0x2008
|
||||
#define CL_EGL_DISPLAY_KHR 0x2009
|
||||
#define CL_GLX_DISPLAY_KHR 0x200A
|
||||
#define CL_WGL_HDC_KHR 0x200B
|
||||
#define CL_CGL_SHAREGROUP_KHR 0x200C
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLContextInfoKHR(const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret);
|
||||
|
||||
/*
|
||||
* cl_khr_gl_event extension
|
||||
*/
|
||||
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromGLsyncKHR(cl_context context,
|
||||
cl_GLsync sync,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1;
|
||||
|
||||
/***************************************************************
|
||||
* cl_intel_sharing_format_query_gl
|
||||
***************************************************************/
|
||||
#define cl_intel_sharing_format_query_gl 1
|
||||
|
||||
/* when cl_khr_gl_sharing is supported */
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetSupportedGLTextureFormatsINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint num_entries,
|
||||
cl_GLenum* gl_formats,
|
||||
cl_uint* num_texture_formats) ;
|
||||
|
||||
typedef cl_int (CL_API_CALL *
|
||||
clGetSupportedGLTextureFormatsINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint num_entries,
|
||||
cl_GLenum* gl_formats,
|
||||
cl_uint* num_texture_formats) ;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_H */
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2021 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#include <CL/cl_gl.h>
|
||||
#pragma message("All OpenGL-related extensions have been moved into cl_gl.h. Please include cl_gl.h directly.")
|
||||
|
|
@ -1,440 +0,0 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2019-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
/**
|
||||
* This is a header-only utility library that provides OpenCL host code with
|
||||
* routines for converting to/from cl_half values.
|
||||
*
|
||||
* Example usage:
|
||||
*
|
||||
* #include <CL/cl_half.h>
|
||||
* ...
|
||||
* cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
|
||||
* cl_float f = cl_half_to_float(h);
|
||||
*/
|
||||
|
||||
#ifndef OPENCL_CL_HALF_H
|
||||
#define OPENCL_CL_HALF_H
|
||||
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Rounding mode used when converting to cl_half.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
CL_HALF_RTE, // round to nearest even
|
||||
CL_HALF_RTZ, // round towards zero
|
||||
CL_HALF_RTP, // round towards positive infinity
|
||||
CL_HALF_RTN, // round towards negative infinity
|
||||
} cl_half_rounding_mode;
|
||||
|
||||
|
||||
/* Private utility macros. */
|
||||
#define CL_HALF_EXP_MASK 0x7C00
|
||||
#define CL_HALF_MAX_FINITE_MAG 0x7BFF
|
||||
|
||||
|
||||
/*
|
||||
* Utility to deal with values that overflow when converting to half precision.
|
||||
*/
|
||||
static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
|
||||
uint16_t sign)
|
||||
{
|
||||
if (rounding_mode == CL_HALF_RTZ)
|
||||
{
|
||||
// Round overflow towards zero -> largest finite number (preserving sign)
|
||||
return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
|
||||
}
|
||||
else if (rounding_mode == CL_HALF_RTP && sign)
|
||||
{
|
||||
// Round negative overflow towards positive infinity -> most negative finite number
|
||||
return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
|
||||
}
|
||||
else if (rounding_mode == CL_HALF_RTN && !sign)
|
||||
{
|
||||
// Round positive overflow towards negative infinity -> largest finite number
|
||||
return CL_HALF_MAX_FINITE_MAG;
|
||||
}
|
||||
|
||||
// Overflow to infinity
|
||||
return (sign << 15) | CL_HALF_EXP_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Utility to deal with values that underflow when converting to half precision.
|
||||
*/
|
||||
static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
|
||||
uint16_t sign)
|
||||
{
|
||||
if (rounding_mode == CL_HALF_RTP && !sign)
|
||||
{
|
||||
// Round underflow towards positive infinity -> smallest positive value
|
||||
return (sign << 15) | 1;
|
||||
}
|
||||
else if (rounding_mode == CL_HALF_RTN && sign)
|
||||
{
|
||||
// Round underflow towards negative infinity -> largest negative value
|
||||
return (sign << 15) | 1;
|
||||
}
|
||||
|
||||
// Flush to zero
|
||||
return (sign << 15);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a cl_float to a cl_half.
|
||||
*/
|
||||
static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
|
||||
{
|
||||
// Type-punning to get direct access to underlying bits
|
||||
union
|
||||
{
|
||||
cl_float f;
|
||||
uint32_t i;
|
||||
} f32;
|
||||
f32.f = f;
|
||||
|
||||
// Extract sign bit
|
||||
uint16_t sign = f32.i >> 31;
|
||||
|
||||
// Extract FP32 exponent and mantissa
|
||||
uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
|
||||
uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
|
||||
|
||||
// Remove FP32 exponent bias
|
||||
int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
|
||||
|
||||
// Add FP16 exponent bias
|
||||
uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
|
||||
|
||||
// Position of the bit that will become the FP16 mantissa LSB
|
||||
uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
|
||||
|
||||
// Check for NaN / infinity
|
||||
if (f_exp == 0xFF)
|
||||
{
|
||||
if (f_mant)
|
||||
{
|
||||
// NaN -> propagate mantissa and silence it
|
||||
uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
|
||||
h_mant |= 0x200;
|
||||
return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Infinity -> zero mantissa
|
||||
return (sign << 15) | CL_HALF_EXP_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for zero
|
||||
if (!f_exp && !f_mant)
|
||||
{
|
||||
return (sign << 15);
|
||||
}
|
||||
|
||||
// Check for overflow
|
||||
if (exp >= CL_HALF_MAX_EXP)
|
||||
{
|
||||
return cl_half_handle_overflow(rounding_mode, sign);
|
||||
}
|
||||
|
||||
// Check for underflow
|
||||
if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
|
||||
{
|
||||
return cl_half_handle_underflow(rounding_mode, sign);
|
||||
}
|
||||
|
||||
// Check for value that will become denormal
|
||||
if (exp < -14)
|
||||
{
|
||||
// Denormal -> include the implicit 1 from the FP32 mantissa
|
||||
h_exp = 0;
|
||||
f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
|
||||
|
||||
// Mantissa shift amount depends on exponent
|
||||
lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
|
||||
}
|
||||
|
||||
// Generate FP16 mantissa by shifting FP32 mantissa
|
||||
uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);
|
||||
|
||||
// Check whether we need to round
|
||||
uint32_t halfway = 1 << (lsb_pos - 1);
|
||||
uint32_t mask = (halfway << 1) - 1;
|
||||
switch (rounding_mode)
|
||||
{
|
||||
case CL_HALF_RTE:
|
||||
if ((f_mant & mask) > halfway)
|
||||
{
|
||||
// More than halfway -> round up
|
||||
h_mant += 1;
|
||||
}
|
||||
else if ((f_mant & mask) == halfway)
|
||||
{
|
||||
// Exactly halfway -> round to nearest even
|
||||
if (h_mant & 0x1)
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
case CL_HALF_RTZ:
|
||||
// Mantissa has already been truncated -> do nothing
|
||||
break;
|
||||
case CL_HALF_RTP:
|
||||
if ((f_mant & mask) && !sign)
|
||||
{
|
||||
// Round positive numbers up
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
case CL_HALF_RTN:
|
||||
if ((f_mant & mask) && sign)
|
||||
{
|
||||
// Round negative numbers down
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Check for mantissa overflow
|
||||
if (h_mant & 0x400)
|
||||
{
|
||||
h_exp += 1;
|
||||
h_mant = 0;
|
||||
}
|
||||
|
||||
return (sign << 15) | (h_exp << 10) | h_mant;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a cl_double to a cl_half.
|
||||
*/
|
||||
static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
|
||||
{
|
||||
// Type-punning to get direct access to underlying bits
|
||||
union
|
||||
{
|
||||
cl_double d;
|
||||
uint64_t i;
|
||||
} f64;
|
||||
f64.d = d;
|
||||
|
||||
// Extract sign bit
|
||||
uint16_t sign = f64.i >> 63;
|
||||
|
||||
// Extract FP64 exponent and mantissa
|
||||
uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
|
||||
uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
|
||||
|
||||
// Remove FP64 exponent bias
|
||||
int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
|
||||
|
||||
// Add FP16 exponent bias
|
||||
uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
|
||||
|
||||
// Position of the bit that will become the FP16 mantissa LSB
|
||||
uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
|
||||
|
||||
// Check for NaN / infinity
|
||||
if (d_exp == 0x7FF)
|
||||
{
|
||||
if (d_mant)
|
||||
{
|
||||
// NaN -> propagate mantissa and silence it
|
||||
uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
|
||||
h_mant |= 0x200;
|
||||
return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Infinity -> zero mantissa
|
||||
return (sign << 15) | CL_HALF_EXP_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for zero
|
||||
if (!d_exp && !d_mant)
|
||||
{
|
||||
return (sign << 15);
|
||||
}
|
||||
|
||||
// Check for overflow
|
||||
if (exp >= CL_HALF_MAX_EXP)
|
||||
{
|
||||
return cl_half_handle_overflow(rounding_mode, sign);
|
||||
}
|
||||
|
||||
// Check for underflow
|
||||
if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
|
||||
{
|
||||
return cl_half_handle_underflow(rounding_mode, sign);
|
||||
}
|
||||
|
||||
// Check for value that will become denormal
|
||||
if (exp < -14)
|
||||
{
|
||||
// Include the implicit 1 from the FP64 mantissa
|
||||
h_exp = 0;
|
||||
d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
|
||||
|
||||
// Mantissa shift amount depends on exponent
|
||||
lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
|
||||
}
|
||||
|
||||
// Generate FP16 mantissa by shifting FP64 mantissa
|
||||
uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
|
||||
|
||||
// Check whether we need to round
|
||||
uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
|
||||
uint64_t mask = (halfway << 1) - 1;
|
||||
switch (rounding_mode)
|
||||
{
|
||||
case CL_HALF_RTE:
|
||||
if ((d_mant & mask) > halfway)
|
||||
{
|
||||
// More than halfway -> round up
|
||||
h_mant += 1;
|
||||
}
|
||||
else if ((d_mant & mask) == halfway)
|
||||
{
|
||||
// Exactly halfway -> round to nearest even
|
||||
if (h_mant & 0x1)
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
case CL_HALF_RTZ:
|
||||
// Mantissa has already been truncated -> do nothing
|
||||
break;
|
||||
case CL_HALF_RTP:
|
||||
if ((d_mant & mask) && !sign)
|
||||
{
|
||||
// Round positive numbers up
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
case CL_HALF_RTN:
|
||||
if ((d_mant & mask) && sign)
|
||||
{
|
||||
// Round negative numbers down
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Check for mantissa overflow
|
||||
if (h_mant & 0x400)
|
||||
{
|
||||
h_exp += 1;
|
||||
h_mant = 0;
|
||||
}
|
||||
|
||||
return (sign << 15) | (h_exp << 10) | h_mant;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a cl_half to a cl_float.
|
||||
*/
|
||||
static inline cl_float cl_half_to_float(cl_half h)
|
||||
{
|
||||
// Type-punning to get direct access to underlying bits
|
||||
union
|
||||
{
|
||||
cl_float f;
|
||||
uint32_t i;
|
||||
} f32;
|
||||
|
||||
// Extract sign bit
|
||||
uint16_t sign = h >> 15;
|
||||
|
||||
// Extract FP16 exponent and mantissa
|
||||
uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||
uint16_t h_mant = h & 0x3FF;
|
||||
|
||||
// Remove FP16 exponent bias
|
||||
int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
|
||||
|
||||
// Add FP32 exponent bias
|
||||
uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
|
||||
|
||||
// Check for NaN / infinity
|
||||
if (h_exp == 0x1F)
|
||||
{
|
||||
if (h_mant)
|
||||
{
|
||||
// NaN -> propagate mantissa and silence it
|
||||
uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
|
||||
f_mant |= 0x400000;
|
||||
f32.i = (sign << 31) | 0x7F800000 | f_mant;
|
||||
return f32.f;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Infinity -> zero mantissa
|
||||
f32.i = (sign << 31) | 0x7F800000;
|
||||
return f32.f;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for zero / denormal
|
||||
if (h_exp == 0)
|
||||
{
|
||||
if (h_mant == 0)
|
||||
{
|
||||
// Zero -> zero exponent
|
||||
f_exp = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Denormal -> normalize it
|
||||
// - Shift mantissa to make most-significant 1 implicit
|
||||
// - Adjust exponent accordingly
|
||||
uint32_t shift = 0;
|
||||
while ((h_mant & 0x400) == 0)
|
||||
{
|
||||
h_mant <<= 1;
|
||||
shift++;
|
||||
}
|
||||
h_mant &= 0x3FF;
|
||||
f_exp -= shift - 1;
|
||||
}
|
||||
}
|
||||
|
||||
f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
|
||||
return f32.f;
|
||||
}
|
||||
|
||||
|
||||
#undef CL_HALF_EXP_MASK
|
||||
#undef CL_HALF_MAX_FINITE_MAG
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* OPENCL_CL_HALF_H */
|
||||
1294
include/CL/cl_icd.h
1294
include/CL/cl_icd.h
File diff suppressed because it is too large
Load diff
|
|
@ -1,62 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* OpenCL is a trademark of Apple Inc. used under license by Khronos.
|
||||
*/
|
||||
|
||||
#ifndef OPENCL_CL_LAYER_H
|
||||
#define OPENCL_CL_LAYER_H
|
||||
|
||||
#include <CL/cl_icd.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef cl_uint cl_layer_info;
|
||||
typedef cl_uint cl_layer_api_version;
|
||||
#define CL_LAYER_API_VERSION 0x4240
|
||||
#define CL_LAYER_NAME 0x4241
|
||||
#define CL_LAYER_API_VERSION_100 100
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetLayerInfo(cl_layer_info param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *pfn_clGetLayerInfo)(cl_layer_info param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clInitLayer(cl_uint num_entries,
|
||||
const cl_icd_dispatch *target_dispatch,
|
||||
cl_uint *num_entries_ret,
|
||||
const cl_icd_dispatch **layer_dispatch_ret);
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *pfn_clInitLayer)(cl_uint num_entries,
|
||||
const cl_icd_dispatch *target_dispatch,
|
||||
cl_uint *num_entries_ret,
|
||||
const cl_icd_dispatch **layer_dispatch_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* OPENCL_CL_LAYER_H */
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,163 +0,0 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
||||
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
#include <va/va.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/***************************************************************
|
||||
* cl_intel_sharing_format_query_va_api
|
||||
***************************************************************/
|
||||
#define cl_intel_sharing_format_query_va_api 1
|
||||
|
||||
/* when cl_intel_va_api_media_sharing is supported */
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetSupportedVA_APIMediaSurfaceFormatsINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint plane,
|
||||
cl_uint num_entries,
|
||||
VAImageFormat* va_api_formats,
|
||||
cl_uint* num_surface_formats) ;
|
||||
|
||||
typedef cl_int (CL_API_CALL *
|
||||
clGetSupportedVA_APIMediaSurfaceFormatsINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_object_type image_type,
|
||||
cl_uint plane,
|
||||
cl_uint num_entries,
|
||||
VAImageFormat* va_api_formats,
|
||||
cl_uint* num_surface_formats) ;
|
||||
|
||||
/******************************************
|
||||
* cl_intel_va_api_media_sharing extension *
|
||||
*******************************************/
|
||||
|
||||
#define cl_intel_va_api_media_sharing 1
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
|
||||
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
|
||||
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
|
||||
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
|
||||
|
||||
/* cl_va_api_device_source_intel */
|
||||
#define CL_VA_API_DISPLAY_INTEL 0x4094
|
||||
|
||||
/* cl_va_api_device_set_intel */
|
||||
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
|
||||
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
|
||||
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
|
||||
|
||||
typedef cl_uint cl_va_api_device_source_intel;
|
||||
typedef cl_uint cl_va_api_device_set_intel;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
|
||||
cl_platform_id platform,
|
||||
cl_va_api_device_source_intel media_adapter_type,
|
||||
void* media_adapter,
|
||||
cl_va_api_device_set_intel media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_va_api_device_source_intel media_adapter_type,
|
||||
void* media_adapter,
|
||||
cl_va_api_device_set_intel media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromVA_APIMediaSurfaceINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
VASurfaceID* surface,
|
||||
cl_uint plane,
|
||||
cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
VASurfaceID* surface,
|
||||
cl_uint plane,
|
||||
cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */
|
||||
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2018-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __CL_VERSION_H
|
||||
#define __CL_VERSION_H
|
||||
|
||||
/* Detect which version to target */
|
||||
#if !defined(CL_TARGET_OPENCL_VERSION)
|
||||
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 300 (OpenCL 3.0)")
|
||||
#define CL_TARGET_OPENCL_VERSION 300
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION != 100 && \
|
||||
CL_TARGET_OPENCL_VERSION != 110 && \
|
||||
CL_TARGET_OPENCL_VERSION != 120 && \
|
||||
CL_TARGET_OPENCL_VERSION != 200 && \
|
||||
CL_TARGET_OPENCL_VERSION != 210 && \
|
||||
CL_TARGET_OPENCL_VERSION != 220 && \
|
||||
CL_TARGET_OPENCL_VERSION != 300
|
||||
#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 300 (OpenCL 3.0)")
|
||||
#undef CL_TARGET_OPENCL_VERSION
|
||||
#define CL_TARGET_OPENCL_VERSION 300
|
||||
#endif
|
||||
|
||||
|
||||
/* OpenCL Version */
|
||||
#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0)
|
||||
#define CL_VERSION_3_0 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
|
||||
#define CL_VERSION_2_2 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
|
||||
#define CL_VERSION_2_1 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
|
||||
#define CL_VERSION_2_0 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
|
||||
#define CL_VERSION_1_2 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
|
||||
#define CL_VERSION_1_1 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
|
||||
#define CL_VERSION_1_0 1
|
||||
#endif
|
||||
|
||||
/* Allow deprecated APIs for older OpenCL versions. */
|
||||
#if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_2_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
|
||||
#endif
|
||||
|
||||
#endif /* __CL_VERSION_H */
|
||||
|
|
@ -1,201 +0,0 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2021 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_H
|
||||
#define __OPENCL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_gl.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_H */
|
||||
10372
include/CL/opencl.hpp
10372
include/CL/opencl.hpp
File diff suppressed because it is too large
Load diff
413
include/cblas.h
413
include/cblas.h
|
|
@ -1,413 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <openblas_config.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
/* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*Set the number of threads on runtime.*/
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the number of threads on runtime.*/
|
||||
int openblas_get_num_threads(void);
|
||||
|
||||
/*Get the number of physical processors (cores).*/
|
||||
int openblas_get_num_procs(void);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
/*Get the CPU corename on runtime.*/
|
||||
char* openblas_get_corename(void);
|
||||
|
||||
#ifdef OPENBLAS_OS_LINUX
|
||||
/* Sets thread affinity for OpenBLAS threads. `thread_idx` is in [0, openblas_get_num_threads()-1]. */
|
||||
int openblas_setaffinity(int thread_idx, size_t cpusetsize, cpu_set_t* cpu_set);
|
||||
/* Queries thread affinity for OpenBLAS threads. `thread_idx` is in [0, openblas_get_num_threads()-1]. */
|
||||
int openblas_getaffinity(int thread_idx, size_t cpusetsize, cpu_set_t* cpu_set);
|
||||
#endif
|
||||
|
||||
/* Get the parallelization type which is used by OpenBLAS */
|
||||
int openblas_get_parallel(void);
|
||||
/* OpenBLAS is compiled for sequential use */
|
||||
#define OPENBLAS_SEQUENTIAL 0
|
||||
/* OpenBLAS is compiled using normal threading model */
|
||||
#define OPENBLAS_THREAD 1
|
||||
/* OpenBLAS is compiled using OpenMP threading model */
|
||||
#define OPENBLAS_OPENMP 2
|
||||
|
||||
|
||||
/*
|
||||
* Since all of GotoBlas was written without const,
|
||||
* we disable it at build time.
|
||||
*/
|
||||
#ifndef OPENBLAS_CONST
|
||||
# define OPENBLAS_CONST const
|
||||
#endif
|
||||
|
||||
|
||||
#define CBLAS_INDEX size_t
|
||||
|
||||
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
|
||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
|
||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||
typedef CBLAS_ORDER CBLAS_LAYOUT;
|
||||
|
||||
float cblas_sdsdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
double cblas_dsdot (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
float cblas_sdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
double cblas_ddot(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
|
||||
void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
|
||||
void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
|
||||
void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
|
||||
|
||||
float cblas_sasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
float cblas_ssum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dsum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
float cblas_scsum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dzsum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
float cblas_snrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dnrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
|
||||
float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
CBLAS_INDEX cblas_isamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
CBLAS_INDEX cblas_isamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
CBLAS_INDEX cblas_ismax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
CBLAS_INDEX cblas_ismin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idmin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icmin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izmin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
void cblas_saxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_srot(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s);
|
||||
void cblas_drot(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s);
|
||||
void cblas_csrot(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s);
|
||||
void cblas_zdrot(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s);
|
||||
|
||||
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||
void cblas_crotg(void *a, void *b, float *c, void *s);
|
||||
void cblas_zrotg(void *a, void *b, double *c, void *s);
|
||||
|
||||
|
||||
void cblas_srotm(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float *P);
|
||||
void cblas_drotm(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double *P);
|
||||
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, OPENBLAS_CONST float b2, float *P);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, OPENBLAS_CONST double b2, double *P);
|
||||
|
||||
void cblas_sscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, void *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_sgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_strsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_strmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_ssyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dsyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_ssyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo,OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X,
|
||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dsyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,
|
||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_sgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_cgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_ssbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dsbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
|
||||
void cblas_stbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_ssymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dsymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
|
||||
void cblas_sspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *Ap,
|
||||
OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *Ap,
|
||||
OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_sspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *Ap);
|
||||
void cblas_dspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *Ap);
|
||||
|
||||
void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A);
|
||||
void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST void *X,OPENBLAS_CONST blasint incX, void *A);
|
||||
|
||||
void cblas_sspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A);
|
||||
void cblas_dspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A);
|
||||
void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *Ap);
|
||||
void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *Ap);
|
||||
|
||||
void cblas_chbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_chpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *Ap, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *Ap, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
|
||||
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_strmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_dtrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ctrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ztrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
|
||||
|
||||
void cblas_strsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_dtrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ctrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ztrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
|
||||
|
||||
void cblas_chemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zhemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_cherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_cher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
/*** BLAS extensions ***/
|
||||
|
||||
void cblas_saxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_daxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_somatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, OPENBLAS_CONST float *a,
|
||||
OPENBLAS_CONST blasint clda, float *b, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_domatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, OPENBLAS_CONST double *a,
|
||||
OPENBLAS_CONST blasint clda, double *b, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_comatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float* calpha, OPENBLAS_CONST float* a,
|
||||
OPENBLAS_CONST blasint clda, float*b, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_zomatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, OPENBLAS_CONST double* a,
|
||||
OPENBLAS_CONST blasint clda, double *b, OPENBLAS_CONST blasint cldb);
|
||||
|
||||
void cblas_simatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_dimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float* calpha, float* a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
|
||||
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
|
||||
/*** BFLOAT16 and INT8 extensions ***/
|
||||
/* convert float array to BFLOAT16 array by rounding */
|
||||
void cblas_sbstobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout);
|
||||
/* convert double array to BFLOAT16 array by rounding */
|
||||
void cblas_sbdtobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout);
|
||||
/* convert BFLOAT16 array to float array */
|
||||
void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, float *out, OPENBLAS_CONST blasint incout);
|
||||
/* convert BFLOAT16 array to double array */
|
||||
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout);
|
||||
/* dot production of BFLOAT16 input arrays, and output as float */
|
||||
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_sbgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sbgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
||||
|
|
@ -1,792 +0,0 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
|
||||
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
|
||||
// width of 100 characters per line.
|
||||
//
|
||||
// Author(s):
|
||||
// Cedric Nugteren <www.cedricnugteren.nl>
|
||||
//
|
||||
// This file contains the interface to the CLBlast BLAS routines. It also contains the definitions
|
||||
// of the returned status codes and the layout and transpose types. This is the only header users
|
||||
// of CLBlast should include and use.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
#ifndef CLBLAST_CLBLAST_H_
|
||||
#define CLBLAST_CLBLAST_H_
|
||||
|
||||
#include <cstdlib> // For size_t
|
||||
#include <string> // For OverrideParameters function
|
||||
#include <unordered_map> // For OverrideParameters function
|
||||
|
||||
// Includes the normal OpenCL C header
|
||||
#if defined(__APPLE__) || defined(__MACOSX)
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
// Exports library functions under Windows when building a DLL. See also:
|
||||
// https://msdn.microsoft.com/en-us/library/a90k134d.aspx
|
||||
#if defined(_WIN32) && defined(CLBLAST_DLL)
|
||||
#if defined(COMPILING_DLL)
|
||||
#define PUBLIC_API __declspec(dllexport)
|
||||
#else
|
||||
#define PUBLIC_API __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#define PUBLIC_API
|
||||
#endif
|
||||
|
||||
// Version numbering (v1.6.0)
|
||||
#define CLBLAST_VERSION_MAJOR 1
|
||||
#define CLBLAST_VERSION_MINOR 6
|
||||
#define CLBLAST_VERSION_PATCH 0
|
||||
|
||||
namespace clblast {
|
||||
// =================================================================================================
|
||||
|
||||
// Status codes. These codes can be returned by functions declared in this header file. The error
|
||||
// codes match either the standard OpenCL error codes or the clBLAS error codes.
|
||||
enum class StatusCode {
|
||||
|
||||
// Status codes in common with the OpenCL standard
|
||||
kSuccess = 0, // CL_SUCCESS
|
||||
kOpenCLCompilerNotAvailable= -3, // CL_COMPILER_NOT_AVAILABLE
|
||||
kTempBufferAllocFailure = -4, // CL_MEM_OBJECT_ALLOCATION_FAILURE
|
||||
kOpenCLOutOfResources = -5, // CL_OUT_OF_RESOURCES
|
||||
kOpenCLOutOfHostMemory = -6, // CL_OUT_OF_HOST_MEMORY
|
||||
kOpenCLBuildProgramFailure = -11, // CL_BUILD_PROGRAM_FAILURE: OpenCL compilation error
|
||||
kInvalidValue = -30, // CL_INVALID_VALUE
|
||||
kInvalidCommandQueue = -36, // CL_INVALID_COMMAND_QUEUE
|
||||
kInvalidMemObject = -38, // CL_INVALID_MEM_OBJECT
|
||||
kInvalidBinary = -42, // CL_INVALID_BINARY
|
||||
kInvalidBuildOptions = -43, // CL_INVALID_BUILD_OPTIONS
|
||||
kInvalidProgram = -44, // CL_INVALID_PROGRAM
|
||||
kInvalidProgramExecutable = -45, // CL_INVALID_PROGRAM_EXECUTABLE
|
||||
kInvalidKernelName = -46, // CL_INVALID_KERNEL_NAME
|
||||
kInvalidKernelDefinition = -47, // CL_INVALID_KERNEL_DEFINITION
|
||||
kInvalidKernel = -48, // CL_INVALID_KERNEL
|
||||
kInvalidArgIndex = -49, // CL_INVALID_ARG_INDEX
|
||||
kInvalidArgValue = -50, // CL_INVALID_ARG_VALUE
|
||||
kInvalidArgSize = -51, // CL_INVALID_ARG_SIZE
|
||||
kInvalidKernelArgs = -52, // CL_INVALID_KERNEL_ARGS
|
||||
kInvalidLocalNumDimensions = -53, // CL_INVALID_WORK_DIMENSION: Too many thread dimensions
|
||||
kInvalidLocalThreadsTotal = -54, // CL_INVALID_WORK_GROUP_SIZE: Too many threads in total
|
||||
kInvalidLocalThreadsDim = -55, // CL_INVALID_WORK_ITEM_SIZE: ... or for a specific dimension
|
||||
kInvalidGlobalOffset = -56, // CL_INVALID_GLOBAL_OFFSET
|
||||
kInvalidEventWaitList = -57, // CL_INVALID_EVENT_WAIT_LIST
|
||||
kInvalidEvent = -58, // CL_INVALID_EVENT
|
||||
kInvalidOperation = -59, // CL_INVALID_OPERATION
|
||||
kInvalidBufferSize = -61, // CL_INVALID_BUFFER_SIZE
|
||||
kInvalidGlobalWorkSize = -63, // CL_INVALID_GLOBAL_WORK_SIZE
|
||||
|
||||
// Status codes in common with the clBLAS library
|
||||
kNotImplemented = -1024, // Routine or functionality not implemented yet
|
||||
kInvalidMatrixA = -1022, // Matrix A is not a valid OpenCL buffer
|
||||
kInvalidMatrixB = -1021, // Matrix B is not a valid OpenCL buffer
|
||||
kInvalidMatrixC = -1020, // Matrix C is not a valid OpenCL buffer
|
||||
kInvalidVectorX = -1019, // Vector X is not a valid OpenCL buffer
|
||||
kInvalidVectorY = -1018, // Vector Y is not a valid OpenCL buffer
|
||||
kInvalidDimension = -1017, // Dimensions M, N, and K have to be larger than zero
|
||||
kInvalidLeadDimA = -1016, // LD of A is smaller than the matrix's first dimension
|
||||
kInvalidLeadDimB = -1015, // LD of B is smaller than the matrix's first dimension
|
||||
kInvalidLeadDimC = -1014, // LD of C is smaller than the matrix's first dimension
|
||||
kInvalidIncrementX = -1013, // Increment of vector X cannot be zero
|
||||
kInvalidIncrementY = -1012, // Increment of vector Y cannot be zero
|
||||
kInsufficientMemoryA = -1011, // Matrix A's OpenCL buffer is too small
|
||||
kInsufficientMemoryB = -1010, // Matrix B's OpenCL buffer is too small
|
||||
kInsufficientMemoryC = -1009, // Matrix C's OpenCL buffer is too small
|
||||
kInsufficientMemoryX = -1008, // Vector X's OpenCL buffer is too small
|
||||
kInsufficientMemoryY = -1007, // Vector Y's OpenCL buffer is too small
|
||||
|
||||
// Custom additional status codes for CLBlast
|
||||
kInsufficientMemoryTemp = -2050, // Temporary buffer provided to GEMM routine is too small
|
||||
kInvalidBatchCount = -2049, // The batch count needs to be positive
|
||||
kInvalidOverrideKernel = -2048, // Trying to override parameters for an invalid kernel
|
||||
kMissingOverrideParameter = -2047, // Missing override parameter(s) for the target kernel
|
||||
kInvalidLocalMemUsage = -2046, // Not enough local memory available on this device
|
||||
kNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device
|
||||
kNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device
|
||||
kInvalidVectorScalar = -2043, // The unit-sized vector is not a valid OpenCL buffer
|
||||
kInsufficientMemoryScalar = -2042, // The unit-sized vector's OpenCL buffer is too small
|
||||
kDatabaseError = -2041, // Entry for the device was not found in the database
|
||||
kUnknownError = -2040, // A catch-all error code representing an unspecified error
|
||||
kUnexpectedError = -2039, // A catch-all error code representing an unexpected exception
|
||||
};
|
||||
|
||||
// Matrix layout and transpose types
|
||||
enum class Layout { kRowMajor = 101, kColMajor = 102 };
|
||||
enum class Transpose { kNo = 111, kYes = 112, kConjugate = 113 };
|
||||
enum class Triangle { kUpper = 121, kLower = 122 };
|
||||
enum class Diagonal { kNonUnit = 131, kUnit = 132 };
|
||||
enum class Side { kLeft = 141, kRight = 142 };
|
||||
enum class KernelMode { kCrossCorrelation = 151, kConvolution = 152 };
|
||||
|
||||
// Precision scoped enum (values in bits)
|
||||
enum class Precision { kHalf = 16, kSingle = 32, kDouble = 64,
|
||||
kComplexSingle = 3232, kComplexDouble = 6464, kAny = -1 };
|
||||
|
||||
// =================================================================================================
|
||||
// BLAS level-1 (vector-vector) routines
|
||||
// =================================================================================================
|
||||
|
||||
// Generate givens plane rotation: SROTG/DROTG
|
||||
template <typename T>
|
||||
StatusCode Rotg(cl_mem sa_buffer, const size_t sa_offset,
|
||||
cl_mem sb_buffer, const size_t sb_offset,
|
||||
cl_mem sc_buffer, const size_t sc_offset,
|
||||
cl_mem ss_buffer, const size_t ss_offset,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Generate modified givens plane rotation: SROTMG/DROTMG
|
||||
template <typename T>
|
||||
StatusCode Rotmg(cl_mem sd1_buffer, const size_t sd1_offset,
|
||||
cl_mem sd2_buffer, const size_t sd2_offset,
|
||||
cl_mem sx1_buffer, const size_t sx1_offset,
|
||||
const cl_mem sy1_buffer, const size_t sy1_offset,
|
||||
cl_mem sparam_buffer, const size_t sparam_offset,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Apply givens plane rotation: SROT/DROT
|
||||
template <typename T>
|
||||
StatusCode Rot(const size_t n,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
const T cos,
|
||||
const T sin,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Apply modified givens plane rotation: SROTM/DROTM
|
||||
template <typename T>
|
||||
StatusCode Rotm(const size_t n,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_mem sparam_buffer, const size_t sparam_offset,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP/HSWAP
|
||||
template <typename T>
|
||||
StatusCode Swap(const size_t n,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL/HSCAL
|
||||
template <typename T>
|
||||
StatusCode Scal(const size_t n,
|
||||
const T alpha,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY/HCOPY
|
||||
template <typename T>
|
||||
StatusCode Copy(const size_t n,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY/HAXPY
|
||||
template <typename T>
|
||||
StatusCode Axpy(const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Dot product of two vectors: SDOT/DDOT/HDOT
|
||||
template <typename T>
|
||||
StatusCode Dot(const size_t n,
|
||||
cl_mem dot_buffer, const size_t dot_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Dot product of two complex vectors: CDOTU/ZDOTU
|
||||
template <typename T>
|
||||
StatusCode Dotu(const size_t n,
|
||||
cl_mem dot_buffer, const size_t dot_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC
|
||||
template <typename T>
|
||||
StatusCode Dotc(const size_t n,
|
||||
cl_mem dot_buffer, const size_t dot_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2/HNRM2
|
||||
template <typename T>
|
||||
StatusCode Nrm2(const size_t n,
|
||||
cl_mem nrm2_buffer, const size_t nrm2_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM/HASUM
|
||||
template <typename T>
|
||||
StatusCode Asum(const size_t n,
|
||||
cl_mem asum_buffer, const size_t asum_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM/HSUM
|
||||
template <typename T>
|
||||
StatusCode Sum(const size_t n,
|
||||
cl_mem sum_buffer, const size_t sum_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
|
||||
template <typename T>
|
||||
StatusCode Amax(const size_t n,
|
||||
cl_mem imax_buffer, const size_t imax_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Index of absolute minimum value in a vector (non-BLAS function): iSAMIN/iDAMIN/iCAMIN/iZAMIN/iHAMIN
|
||||
template <typename T>
|
||||
StatusCode Amin(const size_t n,
|
||||
cl_mem imin_buffer, const size_t imin_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX
|
||||
template <typename T>
|
||||
StatusCode Max(const size_t n,
|
||||
cl_mem imax_buffer, const size_t imax_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN/iHMIN
|
||||
template <typename T>
|
||||
StatusCode Min(const size_t n,
|
||||
cl_mem imin_buffer, const size_t imin_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// =================================================================================================
|
||||
// BLAS level-2 (matrix-vector) routines
|
||||
// =================================================================================================
|
||||
|
||||
// General matrix-vector multiplication: SGEMV/DGEMV/CGEMV/ZGEMV/HGEMV
|
||||
template <typename T>
|
||||
StatusCode Gemv(const Layout layout, const Transpose a_transpose,
|
||||
const size_t m, const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const T beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV/HGBMV
|
||||
template <typename T>
|
||||
StatusCode Gbmv(const Layout layout, const Transpose a_transpose,
|
||||
const size_t m, const size_t n, const size_t kl, const size_t ku,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const T beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Hermitian matrix-vector multiplication: CHEMV/ZHEMV
|
||||
template <typename T>
|
||||
StatusCode Hemv(const Layout layout, const Triangle triangle,
|
||||
const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const T beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV
|
||||
template <typename T>
|
||||
StatusCode Hbmv(const Layout layout, const Triangle triangle,
|
||||
const size_t n, const size_t k,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const T beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV
|
||||
template <typename T>
|
||||
StatusCode Hpmv(const Layout layout, const Triangle triangle,
|
||||
const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem ap_buffer, const size_t ap_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const T beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Symmetric matrix-vector multiplication: SSYMV/DSYMV/HSYMV
|
||||
template <typename T>
|
||||
StatusCode Symv(const Layout layout, const Triangle triangle,
|
||||
const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const T beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Symmetric banded matrix-vector multiplication: SSBMV/DSBMV/HSBMV
|
||||
template <typename T>
|
||||
StatusCode Sbmv(const Layout layout, const Triangle triangle,
|
||||
const size_t n, const size_t k,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const T beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Symmetric packed matrix-vector multiplication: SSPMV/DSPMV/HSPMV
|
||||
template <typename T>
|
||||
StatusCode Spmv(const Layout layout, const Triangle triangle,
|
||||
const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem ap_buffer, const size_t ap_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const T beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV/HTRMV
|
||||
template <typename T>
|
||||
StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
|
||||
const size_t n,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV/HTBMV
|
||||
template <typename T>
|
||||
StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
|
||||
const size_t n, const size_t k,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV/HTPMV
|
||||
template <typename T>
|
||||
StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
|
||||
const size_t n,
|
||||
const cl_mem ap_buffer, const size_t ap_offset,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV
|
||||
template <typename T>
|
||||
StatusCode Trsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
|
||||
const size_t n,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Solves a banded triangular system of equations: STBSV/DTBSV/CTBSV/ZTBSV
|
||||
template <typename T>
|
||||
StatusCode Tbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
|
||||
const size_t n, const size_t k,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Solves a packed triangular system of equations: STPSV/DTPSV/CTPSV/ZTPSV
|
||||
template <typename T>
|
||||
StatusCode Tpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
|
||||
const size_t n,
|
||||
const cl_mem ap_buffer, const size_t ap_offset,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// General rank-1 matrix update: SGER/DGER/HGER
|
||||
template <typename T>
|
||||
StatusCode Ger(const Layout layout,
|
||||
const size_t m, const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// General rank-1 complex matrix update: CGERU/ZGERU
|
||||
template <typename T>
|
||||
StatusCode Geru(const Layout layout,
|
||||
const size_t m, const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// General rank-1 complex conjugated matrix update: CGERC/ZGERC
|
||||
template <typename T>
|
||||
StatusCode Gerc(const Layout layout,
|
||||
const size_t m, const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Hermitian rank-1 matrix update: CHER/ZHER
|
||||
template <typename T>
|
||||
StatusCode Her(const Layout layout, const Triangle triangle,
|
||||
const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Hermitian packed rank-1 matrix update: CHPR/ZHPR
|
||||
template <typename T>
|
||||
StatusCode Hpr(const Layout layout, const Triangle triangle,
|
||||
const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem ap_buffer, const size_t ap_offset,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Hermitian rank-2 matrix update: CHER2/ZHER2
|
||||
template <typename T>
|
||||
StatusCode Her2(const Layout layout, const Triangle triangle,
|
||||
const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Hermitian packed rank-2 matrix update: CHPR2/ZHPR2
|
||||
template <typename T>
|
||||
StatusCode Hpr2(const Layout layout, const Triangle triangle,
|
||||
const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_mem ap_buffer, const size_t ap_offset,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Symmetric rank-1 matrix update: SSYR/DSYR/HSYR
|
||||
template <typename T>
|
||||
StatusCode Syr(const Layout layout, const Triangle triangle,
|
||||
const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Symmetric packed rank-1 matrix update: SSPR/DSPR/HSPR
|
||||
template <typename T>
|
||||
StatusCode Spr(const Layout layout, const Triangle triangle,
|
||||
const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem ap_buffer, const size_t ap_offset,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Symmetric rank-2 matrix update: SSYR2/DSYR2/HSYR2
|
||||
template <typename T>
|
||||
StatusCode Syr2(const Layout layout, const Triangle triangle,
|
||||
const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Symmetric packed rank-2 matrix update: SSPR2/DSPR2/HSPR2
|
||||
template <typename T>
|
||||
StatusCode Spr2(const Layout layout, const Triangle triangle,
|
||||
const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_mem ap_buffer, const size_t ap_offset,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// =================================================================================================
|
||||
// BLAS level-3 (matrix-matrix) routines
|
||||
// =================================================================================================
|
||||
|
||||
// General matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM
|
||||
template <typename T>
|
||||
StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const T beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr,
|
||||
cl_mem temp_buffer = nullptr);
|
||||
|
||||
// Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM/HSYMM
|
||||
template <typename T>
|
||||
StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
|
||||
const size_t m, const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const T beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Hermitian matrix-matrix multiplication: CHEMM/ZHEMM
|
||||
template <typename T>
|
||||
StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle,
|
||||
const size_t m, const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const T beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK/HSYRK
|
||||
template <typename T>
|
||||
StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
|
||||
const size_t n, const size_t k,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const T beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Rank-K update of a hermitian matrix: CHERK/ZHERK
|
||||
template <typename T>
|
||||
StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
|
||||
const size_t n, const size_t k,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const T beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K/HSYR2K
|
||||
template <typename T>
|
||||
StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
|
||||
const size_t n, const size_t k,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const T beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Rank-2K update of a hermitian matrix: CHER2K/ZHER2K
|
||||
template <typename T, typename U>
|
||||
StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
|
||||
const size_t n, const size_t k,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const U beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM/HTRMM
|
||||
template <typename T>
|
||||
StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
|
||||
const size_t m, const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM
|
||||
template <typename T>
|
||||
StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
|
||||
const size_t m, const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// =================================================================================================
|
||||
// Extra non-BLAS routines (level-X)
|
||||
// =================================================================================================
|
||||
|
||||
// Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD
|
||||
template <typename T>
|
||||
StatusCode Had(const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
const T beta,
|
||||
cl_mem z_buffer, const size_t z_offset, const size_t z_inc,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY
|
||||
template <typename T>
|
||||
StatusCode Omatcopy(const Layout layout, const Transpose a_transpose,
|
||||
const size_t m, const size_t n,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Im2col function (non-BLAS function): SIM2COL/DIM2COL/CIM2COL/ZIM2COL/HIM2COL
|
||||
template <typename T>
|
||||
StatusCode Im2col(const KernelMode kernel_mode,
|
||||
const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w,
|
||||
const cl_mem im_buffer, const size_t im_offset,
|
||||
cl_mem col_buffer, const size_t col_offset,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Col2im function (non-BLAS function): SCOL2IM/DCOL2IM/CCOL2IM/ZCOL2IM/HCOL2IM
|
||||
template <typename T>
|
||||
StatusCode Col2im(const KernelMode kernel_mode,
|
||||
const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w,
|
||||
const cl_mem col_buffer, const size_t col_offset,
|
||||
cl_mem im_buffer, const size_t im_offset,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Batched convolution as GEMM (non-BLAS function): SCONVGEMM/DCONVGEMM/HCONVGEMM
|
||||
template <typename T>
|
||||
StatusCode Convgemm(const KernelMode kernel_mode,
|
||||
const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, const size_t num_kernels, const size_t batch_count,
|
||||
const cl_mem im_buffer, const size_t im_offset,
|
||||
const cl_mem kernel_buffer, const size_t kernel_offset,
|
||||
cl_mem result_buffer, const size_t result_offset,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Batched version of AXPY: SAXPYBATCHED/DAXPYBATCHED/CAXPYBATCHED/ZAXPYBATCHED/HAXPYBATCHED
|
||||
template <typename T>
|
||||
StatusCode AxpyBatched(const size_t n,
|
||||
const T *alphas,
|
||||
const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc,
|
||||
cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc,
|
||||
const size_t batch_count,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// Batched version of GEMM: SGEMMBATCHED/DGEMMBATCHED/CGEMMBATCHED/ZGEMMBATCHED/HGEMMBATCHED
|
||||
template <typename T>
|
||||
StatusCode GemmBatched(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const T *alphas,
|
||||
const cl_mem a_buffer, const size_t *a_offsets, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t *b_offsets, const size_t b_ld,
|
||||
const T *betas,
|
||||
cl_mem c_buffer, const size_t *c_offsets, const size_t c_ld,
|
||||
const size_t batch_count,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// StridedBatched version of GEMM: SGEMMSTRIDEDBATCHED/DGEMMSTRIDEDBATCHED/CGEMMSTRIDEDBATCHED/ZGEMMSTRIDEDBATCHED/HGEMMSTRIDEDBATCHED
|
||||
template <typename T>
|
||||
StatusCode GemmStridedBatched(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const T alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const size_t a_stride,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const size_t b_stride,
|
||||
const T beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld, const size_t c_stride,
|
||||
const size_t batch_count,
|
||||
cl_command_queue* queue, cl_event* event = nullptr);
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// Retrieves the required size of the temporary buffer for the GEMM kernel (optional)
|
||||
template <typename T>
|
||||
StatusCode GemmTempBufferSize(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const size_t a_offset, const size_t a_ld,
|
||||
const size_t b_offset, const size_t b_ld,
|
||||
const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, size_t& temp_buffer_size);
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on
|
||||
// for the same device. This cache can be cleared to free up system memory or in case of debugging.
|
||||
StatusCode PUBLIC_API ClearCache();
|
||||
|
||||
// The cache can also be pre-initialized for a specific device with all possible CLBlast kernels.
|
||||
// Further CLBlast routine calls will then run at maximum speed.
|
||||
StatusCode PUBLIC_API FillCache(const cl_device_id device);
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// Retrieves current tuning parameters for a specific device-precision-kernel combination
|
||||
StatusCode PUBLIC_API RetrieveParameters(const cl_device_id device, const std::string &kernel_name,
|
||||
const Precision precision,
|
||||
std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// Overrides tuning parameters for a specific device-precision-kernel combination. The next time
|
||||
// the target routine is called it will re-compile and use the new parameters from then on.
|
||||
StatusCode PUBLIC_API OverrideParameters(const cl_device_id device, const std::string &kernel_name,
|
||||
const Precision precision,
|
||||
const std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// Tunes the "Xaxpy" kernel, used for many level-1 routines such as XAXPY, XCOPY, and XSWAP
|
||||
template <typename T>
|
||||
StatusCode TuneXaxpy(cl_command_queue* queue, const size_t n,
|
||||
const double fraction, std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// Tunes the "Xdot" kernel, used for level-1 reduction routines such as XDOT, XMAX, and XSUM
|
||||
template <typename T>
|
||||
StatusCode TuneXdot(cl_command_queue* queue, const size_t n,
|
||||
const double fraction, std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// Tunes the "Xgemv" kernel, used for matrix-vector level-2 routines such as XGEMV, XGBMV, and XHEMV
|
||||
template <typename T>
|
||||
StatusCode TuneXgemv(cl_command_queue* queue, const size_t m, const size_t n,
|
||||
const double fraction, std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// Tunes the "Xger" kernel, used for matrix update level-2 routines such as XGER, XHER, and XSYR2
|
||||
template <typename T>
|
||||
StatusCode TuneXger(cl_command_queue* queue, const size_t m, const size_t n,
|
||||
const double fraction, std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// Tunes the "Xgemm" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K
|
||||
template <typename T>
|
||||
StatusCode TuneXgemm(cl_command_queue* queue, const size_t m, const size_t n, const size_t k,
|
||||
const double fraction, std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// Tunes the "XgemmDiret" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K
|
||||
template <typename T>
|
||||
StatusCode TuneXgemmDirect(cl_command_queue* queue, const size_t m, const size_t n, const size_t k,
|
||||
const double fraction, std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// Tunes the "Copy" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K
|
||||
template <typename T>
|
||||
StatusCode TuneCopy(cl_command_queue* queue, const size_t m, const size_t n,
|
||||
const double fraction, std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// Tunes the "Pad" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K
|
||||
template <typename T>
|
||||
StatusCode TunePad(cl_command_queue* queue, const size_t m, const size_t n,
|
||||
const double fraction, std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// Tunes the "Transpose" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K
|
||||
template <typename T>
|
||||
StatusCode TuneTranspose(cl_command_queue* queue, const size_t m, const size_t n,
|
||||
const double fraction, std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// Tunes the "Padtranspose" kernel, used for most level-3 routines such as XGEMM, XSYMM, and XHER2K
|
||||
template <typename T>
|
||||
StatusCode TunePadtranspose(cl_command_queue* queue, const size_t m, const size_t n,
|
||||
const double fraction, std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// Tunes the "Xgemm" kernel, used for the level-3 routine XTRSM
|
||||
template <typename T>
|
||||
StatusCode TuneInvert(cl_command_queue* queue, const size_t m, const size_t n, const size_t k,
|
||||
const double fraction, std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
} // namespace clblast
|
||||
|
||||
// CLBLAST_CLBLAST_H_
|
||||
#endif
|
||||
1707
include/clblast_c.h
1707
include/clblast_c.h
File diff suppressed because it is too large
Load diff
|
|
@ -1,249 +0,0 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
|
||||
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
|
||||
// width of 100 characters per line.
|
||||
//
|
||||
// Author(s):
|
||||
// Cedric Nugteren <www.cedricnugteren.nl>
|
||||
//
|
||||
// This file provides simple conversion operations between fp16 (half) and fp32 (float). These
|
||||
// conversion functions are based on ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf and
|
||||
// are also part of the C++ half-precision header (http://half.sourceforge.net/).
|
||||
//
|
||||
// This file is pure C99.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
#ifndef CLBLAST_HALF_H_
|
||||
#define CLBLAST_HALF_H_
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// The host data-type for half-precision floating-point (16-bit) is based on the `cl_half` OpenCL
|
||||
// type, which is a typedef for unsigned short.
|
||||
typedef unsigned short half;
|
||||
|
||||
// 32-bit union for conversions
|
||||
typedef union ConversionBits_ {
|
||||
unsigned int i32;
|
||||
float f32;
|
||||
} ConversionBits;
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// Converts a IEEE-compliant single-precision value to half-precision floating-point. This function
|
||||
// applies simple truncation (round toward zero, but with overflows set to infinity) as rounding
|
||||
// mode.
|
||||
static half FloatToHalf(const float value) {
|
||||
static const unsigned short base_table[512] = {
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100,
|
||||
0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00,
|
||||
0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100,
|
||||
0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00,
|
||||
0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
|
||||
0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00
|
||||
};
|
||||
static const unsigned char shift_table[512] = {
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
|
||||
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13
|
||||
};
|
||||
ConversionBits bits;
|
||||
bits.f32 = value;
|
||||
const unsigned short halfbits = base_table[bits.i32 >> 23] +
|
||||
(unsigned short)((bits.i32 & 0x7FFFFF) >> shift_table[bits.i32 >> 23]);
|
||||
return halfbits;
|
||||
}
|
||||
|
||||
// Converts a half-precision value to IEEE-compliant single-precision floating-point
|
||||
static float HalfToFloat(const half value) {
|
||||
static const unsigned int mantissa_table[2048] = {
|
||||
0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000,
|
||||
0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000,
|
||||
0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000,
|
||||
0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000,
|
||||
0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000,
|
||||
0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000,
|
||||
0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000,
|
||||
0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000,
|
||||
0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000,
|
||||
0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000,
|
||||
0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000,
|
||||
0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000,
|
||||
0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000,
|
||||
0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000,
|
||||
0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000,
|
||||
0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000,
|
||||
0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000,
|
||||
0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000,
|
||||
0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000,
|
||||
0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000,
|
||||
0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000,
|
||||
0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000,
|
||||
0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000,
|
||||
0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000,
|
||||
0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000,
|
||||
0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000,
|
||||
0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000,
|
||||
0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000,
|
||||
0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000,
|
||||
0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000,
|
||||
0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000,
|
||||
0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000,
|
||||
0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000,
|
||||
0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000,
|
||||
0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000,
|
||||
0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000,
|
||||
0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000,
|
||||
0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000,
|
||||
0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000,
|
||||
0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000,
|
||||
0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000,
|
||||
0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000,
|
||||
0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000,
|
||||
0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000,
|
||||
0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000,
|
||||
0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000,
|
||||
0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000,
|
||||
0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000,
|
||||
0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000,
|
||||
0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000,
|
||||
0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000,
|
||||
0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000,
|
||||
0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000,
|
||||
0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000,
|
||||
0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000,
|
||||
0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000,
|
||||
0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000,
|
||||
0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000,
|
||||
0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000,
|
||||
0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000,
|
||||
0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000,
|
||||
0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000,
|
||||
0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000,
|
||||
0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000,
|
||||
0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000,
|
||||
0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000,
|
||||
0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000,
|
||||
0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000,
|
||||
0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000,
|
||||
0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000,
|
||||
0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000,
|
||||
0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000,
|
||||
0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000,
|
||||
0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000,
|
||||
0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000,
|
||||
0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000,
|
||||
0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000,
|
||||
0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000,
|
||||
0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000,
|
||||
0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000,
|
||||
0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000,
|
||||
0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000,
|
||||
0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000,
|
||||
0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000,
|
||||
0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000,
|
||||
0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000,
|
||||
0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000,
|
||||
0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000,
|
||||
0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000,
|
||||
0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000,
|
||||
0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000,
|
||||
0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000,
|
||||
0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000,
|
||||
0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000,
|
||||
0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000,
|
||||
0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000,
|
||||
0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000,
|
||||
0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000,
|
||||
0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000,
|
||||
0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000,
|
||||
0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000,
|
||||
0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000,
|
||||
0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000,
|
||||
0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000,
|
||||
0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000,
|
||||
0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000,
|
||||
0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000,
|
||||
0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000,
|
||||
0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000,
|
||||
0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000,
|
||||
0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000,
|
||||
0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000,
|
||||
0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000,
|
||||
0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000,
|
||||
0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000,
|
||||
0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000,
|
||||
0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000,
|
||||
0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000,
|
||||
0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000,
|
||||
0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000,
|
||||
0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000,
|
||||
0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000,
|
||||
0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000,
|
||||
0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000,
|
||||
0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000,
|
||||
0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000,
|
||||
0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000,
|
||||
0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000
|
||||
};
|
||||
static const unsigned int exponent_table[64] = {
|
||||
0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000,
|
||||
0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000,
|
||||
0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
|
||||
0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000
|
||||
};
|
||||
static const unsigned short offset_table[64] = {
|
||||
0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
|
||||
0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024
|
||||
};
|
||||
ConversionBits bits;
|
||||
bits.i32 = mantissa_table[offset_table[value >> 10] + (value & 0x3FF)] +
|
||||
exponent_table[value >> 10];
|
||||
return bits.f32;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// CLBLAST_HALF_H_
|
||||
#endif
|
||||
|
|
@ -1,993 +0,0 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
|
||||
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
|
||||
// width of 100 characters per line.
|
||||
//
|
||||
// Author(s):
|
||||
// Cedric Nugteren <www.cedricnugteren.nl>
|
||||
//
|
||||
// This file contains the Netlib CBLAS interface to the CLBlast BLAS routines, performing all buffer
|
||||
// copies automatically and running on the default OpenCL platform and device. For full control over
|
||||
// performance, it is advised to use the regular clblast.h or clblast_c.h headers instead.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
#ifndef CLBLAST_CLBLAST_NETLIB_C_H_
|
||||
#define CLBLAST_CLBLAST_NETLIB_C_H_
|
||||
|
||||
// Exports library functions under Windows when building a DLL. See also:
|
||||
// https://msdn.microsoft.com/en-us/library/a90k134d.aspx
|
||||
#if defined(_WIN32) && defined(CLBLAST_DLL)
|
||||
#if defined(COMPILING_DLL)
|
||||
#define PUBLIC_API __declspec(dllexport)
|
||||
#else
|
||||
#define PUBLIC_API __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#define PUBLIC_API
|
||||
#endif
|
||||
|
||||
// The C interface
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// Matrix layout and transpose types
|
||||
typedef enum CLBlastLayout_ { CLBlastLayoutRowMajor = 101,
|
||||
CLBlastLayoutColMajor = 102 } CLBlastLayout;
|
||||
typedef enum CLBlastTranspose_ { CLBlastTransposeNo = 111, CLBlastTransposeYes = 112,
|
||||
CLBlastTransposeConjugate = 113 } CLBlastTranspose;
|
||||
typedef enum CLBlastTriangle_ { CLBlastTriangleUpper = 121,
|
||||
CLBlastTriangleLower = 122 } CLBlastTriangle;
|
||||
typedef enum CLBlastDiagonal_ { CLBlastDiagonalNonUnit = 131,
|
||||
CLBlastDiagonalUnit = 132 } CLBlastDiagonal;
|
||||
typedef enum CLBlastSide_ { CLBlastSideLeft = 141, CLBlastSideRight = 142 } CLBlastSide;
|
||||
typedef enum CLBlastKernelMode_ { CLBlastKernelModeCrossCorrelation = 141, CLBlastKernelModeConvolution = 152 } CLBlastKernelMode;
|
||||
|
||||
// For full compatibility with CBLAS
|
||||
typedef CLBlastLayout CBLAS_ORDER;
|
||||
typedef CLBlastTranspose CBLAS_TRANSPOSE;
|
||||
typedef CLBlastTriangle CBLAS_UPLO;
|
||||
typedef CLBlastDiagonal CBLAS_DIAG;
|
||||
typedef CLBlastSide CBLAS_SIDE;
|
||||
#define CblasRowMajor CLBlastLayoutRowMajor
|
||||
#define CblasColMajor CLBlastLayoutColMajor
|
||||
#define CblasNoTrans CLBlastTransposeNo
|
||||
#define CblasTrans CLBlastTransposeYes
|
||||
#define CblasConjTrans CLBlastTransposeConjugate
|
||||
#define CblasUpper CLBlastTriangleUpper
|
||||
#define CblasLower CLBlastTriangleLower
|
||||
#define CblasNonUnit CLBlastDiagonalNonUnit
|
||||
#define CblasUnit CLBlastDiagonalUnit
|
||||
#define CblasLeft CLBlastSideLeft
|
||||
#define CblasRight CLBlastSideRight
|
||||
|
||||
// =================================================================================================
|
||||
// BLAS level-1 (vector-vector) routines
|
||||
// =================================================================================================
|
||||
|
||||
// Generate givens plane rotation: SROTG/DROTG
|
||||
void PUBLIC_API cblas_srotg(float* sa,
|
||||
float* sb,
|
||||
float* sc,
|
||||
float* ss);
|
||||
void PUBLIC_API cblas_drotg(double* sa,
|
||||
double* sb,
|
||||
double* sc,
|
||||
double* ss);
|
||||
|
||||
// Generate modified givens plane rotation: SROTMG/DROTMG
|
||||
void PUBLIC_API cblas_srotmg(float* sd1,
|
||||
float* sd2,
|
||||
float* sx1,
|
||||
const float sy1,
|
||||
float* sparam);
|
||||
void PUBLIC_API cblas_drotmg(double* sd1,
|
||||
double* sd2,
|
||||
double* sx1,
|
||||
const double sy1,
|
||||
double* sparam);
|
||||
|
||||
// Apply givens plane rotation: SROT/DROT
|
||||
void PUBLIC_API cblas_srot(const int n,
|
||||
float* x, const int x_inc,
|
||||
float* y, const int y_inc,
|
||||
const float cos,
|
||||
const float sin);
|
||||
void PUBLIC_API cblas_drot(const int n,
|
||||
double* x, const int x_inc,
|
||||
double* y, const int y_inc,
|
||||
const double cos,
|
||||
const double sin);
|
||||
|
||||
// Apply modified givens plane rotation: SROTM/DROTM
|
||||
void PUBLIC_API cblas_srotm(const int n,
|
||||
float* x, const int x_inc,
|
||||
float* y, const int y_inc,
|
||||
float* sparam);
|
||||
void PUBLIC_API cblas_drotm(const int n,
|
||||
double* x, const int x_inc,
|
||||
double* y, const int y_inc,
|
||||
double* sparam);
|
||||
|
||||
// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP/HSWAP
|
||||
void PUBLIC_API cblas_sswap(const int n,
|
||||
float* x, const int x_inc,
|
||||
float* y, const int y_inc);
|
||||
void PUBLIC_API cblas_dswap(const int n,
|
||||
double* x, const int x_inc,
|
||||
double* y, const int y_inc);
|
||||
void PUBLIC_API cblas_cswap(const int n,
|
||||
void* x, const int x_inc,
|
||||
void* y, const int y_inc);
|
||||
void PUBLIC_API cblas_zswap(const int n,
|
||||
void* x, const int x_inc,
|
||||
void* y, const int y_inc);
|
||||
|
||||
// Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL/HSCAL
|
||||
void PUBLIC_API cblas_sscal(const int n,
|
||||
const float alpha,
|
||||
float* x, const int x_inc);
|
||||
void PUBLIC_API cblas_dscal(const int n,
|
||||
const double alpha,
|
||||
double* x, const int x_inc);
|
||||
void PUBLIC_API cblas_cscal(const int n,
|
||||
const void* alpha,
|
||||
void* x, const int x_inc);
|
||||
void PUBLIC_API cblas_zscal(const int n,
|
||||
const void* alpha,
|
||||
void* x, const int x_inc);
|
||||
|
||||
// Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY/HCOPY
|
||||
void PUBLIC_API cblas_scopy(const int n,
|
||||
const float* x, const int x_inc,
|
||||
float* y, const int y_inc);
|
||||
void PUBLIC_API cblas_dcopy(const int n,
|
||||
const double* x, const int x_inc,
|
||||
double* y, const int y_inc);
|
||||
void PUBLIC_API cblas_ccopy(const int n,
|
||||
const void* x, const int x_inc,
|
||||
void* y, const int y_inc);
|
||||
void PUBLIC_API cblas_zcopy(const int n,
|
||||
const void* x, const int x_inc,
|
||||
void* y, const int y_inc);
|
||||
|
||||
// Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY/HAXPY
|
||||
void PUBLIC_API cblas_saxpy(const int n,
|
||||
const float alpha,
|
||||
const float* x, const int x_inc,
|
||||
float* y, const int y_inc);
|
||||
void PUBLIC_API cblas_daxpy(const int n,
|
||||
const double alpha,
|
||||
const double* x, const int x_inc,
|
||||
double* y, const int y_inc);
|
||||
void PUBLIC_API cblas_caxpy(const int n,
|
||||
const void* alpha,
|
||||
const void* x, const int x_inc,
|
||||
void* y, const int y_inc);
|
||||
void PUBLIC_API cblas_zaxpy(const int n,
|
||||
const void* alpha,
|
||||
const void* x, const int x_inc,
|
||||
void* y, const int y_inc);
|
||||
|
||||
// Dot product of two vectors: SDOT/DDOT/HDOT
|
||||
float PUBLIC_API cblas_sdot(const int n,
|
||||
const float* x, const int x_inc,
|
||||
const float* y, const int y_inc);
|
||||
double PUBLIC_API cblas_ddot(const int n,
|
||||
const double* x, const int x_inc,
|
||||
const double* y, const int y_inc);
|
||||
|
||||
// Dot product of two complex vectors: CDOTU/ZDOTU
|
||||
void PUBLIC_API cblas_cdotu_sub(const int n,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
void* dot);
|
||||
void PUBLIC_API cblas_zdotu_sub(const int n,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
void* dot);
|
||||
|
||||
// Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC
|
||||
void PUBLIC_API cblas_cdotc_sub(const int n,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
void* dot);
|
||||
void PUBLIC_API cblas_zdotc_sub(const int n,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
void* dot);
|
||||
|
||||
// Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2/HNRM2
|
||||
float PUBLIC_API cblas_snrm2(const int n,
|
||||
const float* x, const int x_inc);
|
||||
double PUBLIC_API cblas_dnrm2(const int n,
|
||||
const double* x, const int x_inc);
|
||||
float PUBLIC_API cblas_scnrm2(const int n,
|
||||
const void* x, const int x_inc);
|
||||
double PUBLIC_API cblas_dznrm2(const int n,
|
||||
const void* x, const int x_inc);
|
||||
|
||||
// Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM/HASUM
|
||||
float PUBLIC_API cblas_sasum(const int n,
|
||||
const float* x, const int x_inc);
|
||||
double PUBLIC_API cblas_dasum(const int n,
|
||||
const double* x, const int x_inc);
|
||||
float PUBLIC_API cblas_scasum(const int n,
|
||||
const void* x, const int x_inc);
|
||||
double PUBLIC_API cblas_dzasum(const int n,
|
||||
const void* x, const int x_inc);
|
||||
|
||||
// Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM/HSUM
|
||||
float PUBLIC_API cblas_ssum(const int n,
|
||||
const float* x, const int x_inc);
|
||||
double PUBLIC_API cblas_dsum(const int n,
|
||||
const double* x, const int x_inc);
|
||||
float PUBLIC_API cblas_scsum(const int n,
|
||||
const void* x, const int x_inc);
|
||||
double PUBLIC_API cblas_dzsum(const int n,
|
||||
const void* x, const int x_inc);
|
||||
|
||||
// Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
|
||||
int PUBLIC_API cblas_isamax(const int n,
|
||||
const float* x, const int x_inc);
|
||||
int PUBLIC_API cblas_idamax(const int n,
|
||||
const double* x, const int x_inc);
|
||||
int PUBLIC_API cblas_icamax(const int n,
|
||||
const void* x, const int x_inc);
|
||||
int PUBLIC_API cblas_izamax(const int n,
|
||||
const void* x, const int x_inc);
|
||||
|
||||
// Index of absolute minimum value in a vector (non-BLAS function): iSAMIN/iDAMIN/iCAMIN/iZAMIN/iHAMIN
|
||||
int PUBLIC_API cblas_isamin(const int n,
|
||||
const float* x, const int x_inc);
|
||||
int PUBLIC_API cblas_idamin(const int n,
|
||||
const double* x, const int x_inc);
|
||||
int PUBLIC_API cblas_icamin(const int n,
|
||||
const void* x, const int x_inc);
|
||||
int PUBLIC_API cblas_izamin(const int n,
|
||||
const void* x, const int x_inc);
|
||||
|
||||
// Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX
|
||||
int PUBLIC_API cblas_ismax(const int n,
|
||||
const float* x, const int x_inc);
|
||||
int PUBLIC_API cblas_idmax(const int n,
|
||||
const double* x, const int x_inc);
|
||||
int PUBLIC_API cblas_icmax(const int n,
|
||||
const void* x, const int x_inc);
|
||||
int PUBLIC_API cblas_izmax(const int n,
|
||||
const void* x, const int x_inc);
|
||||
|
||||
// Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN/iHMIN
|
||||
int PUBLIC_API cblas_ismin(const int n,
|
||||
const float* x, const int x_inc);
|
||||
int PUBLIC_API cblas_idmin(const int n,
|
||||
const double* x, const int x_inc);
|
||||
int PUBLIC_API cblas_icmin(const int n,
|
||||
const void* x, const int x_inc);
|
||||
int PUBLIC_API cblas_izmin(const int n,
|
||||
const void* x, const int x_inc);
|
||||
|
||||
// =================================================================================================
|
||||
// BLAS level-2 (matrix-vector) routines
|
||||
// =================================================================================================
|
||||
|
||||
// General matrix-vector multiplication: SGEMV/DGEMV/CGEMV/ZGEMV/HGEMV
|
||||
void PUBLIC_API cblas_sgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
|
||||
const int m, const int n,
|
||||
const float alpha,
|
||||
const float* a, const int a_ld,
|
||||
const float* x, const int x_inc,
|
||||
const float beta,
|
||||
float* y, const int y_inc);
|
||||
void PUBLIC_API cblas_dgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
|
||||
const int m, const int n,
|
||||
const double alpha,
|
||||
const double* a, const int a_ld,
|
||||
const double* x, const int x_inc,
|
||||
const double beta,
|
||||
double* y, const int y_inc);
|
||||
void PUBLIC_API cblas_cgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* x, const int x_inc,
|
||||
const void* beta,
|
||||
void* y, const int y_inc);
|
||||
void PUBLIC_API cblas_zgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* x, const int x_inc,
|
||||
const void* beta,
|
||||
void* y, const int y_inc);
|
||||
|
||||
// General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV/HGBMV
|
||||
void PUBLIC_API cblas_sgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
|
||||
const int m, const int n, const int kl, const int ku,
|
||||
const float alpha,
|
||||
const float* a, const int a_ld,
|
||||
const float* x, const int x_inc,
|
||||
const float beta,
|
||||
float* y, const int y_inc);
|
||||
void PUBLIC_API cblas_dgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
|
||||
const int m, const int n, const int kl, const int ku,
|
||||
const double alpha,
|
||||
const double* a, const int a_ld,
|
||||
const double* x, const int x_inc,
|
||||
const double beta,
|
||||
double* y, const int y_inc);
|
||||
void PUBLIC_API cblas_cgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
|
||||
const int m, const int n, const int kl, const int ku,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* x, const int x_inc,
|
||||
const void* beta,
|
||||
void* y, const int y_inc);
|
||||
void PUBLIC_API cblas_zgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
|
||||
const int m, const int n, const int kl, const int ku,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* x, const int x_inc,
|
||||
const void* beta,
|
||||
void* y, const int y_inc);
|
||||
|
||||
// Hermitian matrix-vector multiplication: CHEMV/ZHEMV
|
||||
void PUBLIC_API cblas_chemv(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* x, const int x_inc,
|
||||
const void* beta,
|
||||
void* y, const int y_inc);
|
||||
void PUBLIC_API cblas_zhemv(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* x, const int x_inc,
|
||||
const void* beta,
|
||||
void* y, const int y_inc);
|
||||
|
||||
// Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV
|
||||
void PUBLIC_API cblas_chbmv(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n, const int k,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* x, const int x_inc,
|
||||
const void* beta,
|
||||
void* y, const int y_inc);
|
||||
void PUBLIC_API cblas_zhbmv(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n, const int k,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* x, const int x_inc,
|
||||
const void* beta,
|
||||
void* y, const int y_inc);
|
||||
|
||||
// Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV
|
||||
void PUBLIC_API cblas_chpmv(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const void* alpha,
|
||||
const void* ap,
|
||||
const void* x, const int x_inc,
|
||||
const void* beta,
|
||||
void* y, const int y_inc);
|
||||
void PUBLIC_API cblas_zhpmv(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const void* alpha,
|
||||
const void* ap,
|
||||
const void* x, const int x_inc,
|
||||
const void* beta,
|
||||
void* y, const int y_inc);
|
||||
|
||||
// Symmetric matrix-vector multiplication: SSYMV/DSYMV/HSYMV
|
||||
void PUBLIC_API cblas_ssymv(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const float alpha,
|
||||
const float* a, const int a_ld,
|
||||
const float* x, const int x_inc,
|
||||
const float beta,
|
||||
float* y, const int y_inc);
|
||||
void PUBLIC_API cblas_dsymv(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const double alpha,
|
||||
const double* a, const int a_ld,
|
||||
const double* x, const int x_inc,
|
||||
const double beta,
|
||||
double* y, const int y_inc);
|
||||
|
||||
// Symmetric banded matrix-vector multiplication: SSBMV/DSBMV/HSBMV
|
||||
void PUBLIC_API cblas_ssbmv(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n, const int k,
|
||||
const float alpha,
|
||||
const float* a, const int a_ld,
|
||||
const float* x, const int x_inc,
|
||||
const float beta,
|
||||
float* y, const int y_inc);
|
||||
void PUBLIC_API cblas_dsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n, const int k,
|
||||
const double alpha,
|
||||
const double* a, const int a_ld,
|
||||
const double* x, const int x_inc,
|
||||
const double beta,
|
||||
double* y, const int y_inc);
|
||||
|
||||
// Symmetric packed matrix-vector multiplication: SSPMV/DSPMV/HSPMV
|
||||
void PUBLIC_API cblas_sspmv(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const float alpha,
|
||||
const float* ap,
|
||||
const float* x, const int x_inc,
|
||||
const float beta,
|
||||
float* y, const int y_inc);
|
||||
void PUBLIC_API cblas_dspmv(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const double alpha,
|
||||
const double* ap,
|
||||
const double* x, const int x_inc,
|
||||
const double beta,
|
||||
double* y, const int y_inc);
|
||||
|
||||
// Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV/HTRMV
|
||||
void PUBLIC_API cblas_strmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const float* a, const int a_ld,
|
||||
float* x, const int x_inc);
|
||||
void PUBLIC_API cblas_dtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const double* a, const int a_ld,
|
||||
double* x, const int x_inc);
|
||||
void PUBLIC_API cblas_ctrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const void* a, const int a_ld,
|
||||
void* x, const int x_inc);
|
||||
void PUBLIC_API cblas_ztrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const void* a, const int a_ld,
|
||||
void* x, const int x_inc);
|
||||
|
||||
// Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV/HTBMV
|
||||
void PUBLIC_API cblas_stbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n, const int k,
|
||||
const float* a, const int a_ld,
|
||||
float* x, const int x_inc);
|
||||
void PUBLIC_API cblas_dtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n, const int k,
|
||||
const double* a, const int a_ld,
|
||||
double* x, const int x_inc);
|
||||
void PUBLIC_API cblas_ctbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n, const int k,
|
||||
const void* a, const int a_ld,
|
||||
void* x, const int x_inc);
|
||||
void PUBLIC_API cblas_ztbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n, const int k,
|
||||
const void* a, const int a_ld,
|
||||
void* x, const int x_inc);
|
||||
|
||||
// Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV/HTPMV
|
||||
void PUBLIC_API cblas_stpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const float* ap,
|
||||
float* x, const int x_inc);
|
||||
void PUBLIC_API cblas_dtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const double* ap,
|
||||
double* x, const int x_inc);
|
||||
void PUBLIC_API cblas_ctpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const void* ap,
|
||||
void* x, const int x_inc);
|
||||
void PUBLIC_API cblas_ztpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const void* ap,
|
||||
void* x, const int x_inc);
|
||||
|
||||
// Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV
|
||||
void PUBLIC_API cblas_strsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const float* a, const int a_ld,
|
||||
float* x, const int x_inc);
|
||||
void PUBLIC_API cblas_dtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const double* a, const int a_ld,
|
||||
double* x, const int x_inc);
|
||||
void PUBLIC_API cblas_ctrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const void* a, const int a_ld,
|
||||
void* x, const int x_inc);
|
||||
void PUBLIC_API cblas_ztrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const void* a, const int a_ld,
|
||||
void* x, const int x_inc);
|
||||
|
||||
// Solves a banded triangular system of equations: STBSV/DTBSV/CTBSV/ZTBSV
|
||||
void PUBLIC_API cblas_stbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n, const int k,
|
||||
const float* a, const int a_ld,
|
||||
float* x, const int x_inc);
|
||||
void PUBLIC_API cblas_dtbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n, const int k,
|
||||
const double* a, const int a_ld,
|
||||
double* x, const int x_inc);
|
||||
void PUBLIC_API cblas_ctbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n, const int k,
|
||||
const void* a, const int a_ld,
|
||||
void* x, const int x_inc);
|
||||
void PUBLIC_API cblas_ztbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n, const int k,
|
||||
const void* a, const int a_ld,
|
||||
void* x, const int x_inc);
|
||||
|
||||
// Solves a packed triangular system of equations: STPSV/DTPSV/CTPSV/ZTPSV
|
||||
void PUBLIC_API cblas_stpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const float* ap,
|
||||
float* x, const int x_inc);
|
||||
void PUBLIC_API cblas_dtpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const double* ap,
|
||||
double* x, const int x_inc);
|
||||
void PUBLIC_API cblas_ctpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const void* ap,
|
||||
void* x, const int x_inc);
|
||||
void PUBLIC_API cblas_ztpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int n,
|
||||
const void* ap,
|
||||
void* x, const int x_inc);
|
||||
|
||||
// General rank-1 matrix update: SGER/DGER/HGER
|
||||
void PUBLIC_API cblas_sger(const CLBlastLayout layout,
|
||||
const int m, const int n,
|
||||
const float alpha,
|
||||
const float* x, const int x_inc,
|
||||
const float* y, const int y_inc,
|
||||
float* a, const int a_ld);
|
||||
void PUBLIC_API cblas_dger(const CLBlastLayout layout,
|
||||
const int m, const int n,
|
||||
const double alpha,
|
||||
const double* x, const int x_inc,
|
||||
const double* y, const int y_inc,
|
||||
double* a, const int a_ld);
|
||||
|
||||
// General rank-1 complex matrix update: CGERU/ZGERU
|
||||
void PUBLIC_API cblas_cgeru(const CLBlastLayout layout,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
void* a, const int a_ld);
|
||||
void PUBLIC_API cblas_zgeru(const CLBlastLayout layout,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
void* a, const int a_ld);
|
||||
|
||||
// General rank-1 complex conjugated matrix update: CGERC/ZGERC
|
||||
void PUBLIC_API cblas_cgerc(const CLBlastLayout layout,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
void* a, const int a_ld);
|
||||
void PUBLIC_API cblas_zgerc(const CLBlastLayout layout,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
void* a, const int a_ld);
|
||||
|
||||
// Hermitian rank-1 matrix update: CHER/ZHER
|
||||
void PUBLIC_API cblas_cher(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const float alpha,
|
||||
const void* x, const int x_inc,
|
||||
void* a, const int a_ld);
|
||||
void PUBLIC_API cblas_zher(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const double alpha,
|
||||
const void* x, const int x_inc,
|
||||
void* a, const int a_ld);
|
||||
|
||||
// Hermitian packed rank-1 matrix update: CHPR/ZHPR
|
||||
void PUBLIC_API cblas_chpr(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const float alpha,
|
||||
const void* x, const int x_inc,
|
||||
void* ap);
|
||||
void PUBLIC_API cblas_zhpr(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const double alpha,
|
||||
const void* x, const int x_inc,
|
||||
void* ap);
|
||||
|
||||
// Hermitian rank-2 matrix update: CHER2/ZHER2
|
||||
void PUBLIC_API cblas_cher2(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const void* alpha,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
void* a, const int a_ld);
|
||||
void PUBLIC_API cblas_zher2(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const void* alpha,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
void* a, const int a_ld);
|
||||
|
||||
// Hermitian packed rank-2 matrix update: CHPR2/ZHPR2
|
||||
void PUBLIC_API cblas_chpr2(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const void* alpha,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
void* ap);
|
||||
void PUBLIC_API cblas_zhpr2(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const void* alpha,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
void* ap);
|
||||
|
||||
// Symmetric rank-1 matrix update: SSYR/DSYR/HSYR
|
||||
void PUBLIC_API cblas_ssyr(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const float alpha,
|
||||
const float* x, const int x_inc,
|
||||
float* a, const int a_ld);
|
||||
void PUBLIC_API cblas_dsyr(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const double alpha,
|
||||
const double* x, const int x_inc,
|
||||
double* a, const int a_ld);
|
||||
|
||||
// Symmetric packed rank-1 matrix update: SSPR/DSPR/HSPR
|
||||
void PUBLIC_API cblas_sspr(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const float alpha,
|
||||
const float* x, const int x_inc,
|
||||
float* ap);
|
||||
void PUBLIC_API cblas_dspr(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const double alpha,
|
||||
const double* x, const int x_inc,
|
||||
double* ap);
|
||||
|
||||
// Symmetric rank-2 matrix update: SSYR2/DSYR2/HSYR2
|
||||
void PUBLIC_API cblas_ssyr2(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const float alpha,
|
||||
const float* x, const int x_inc,
|
||||
const float* y, const int y_inc,
|
||||
float* a, const int a_ld);
|
||||
void PUBLIC_API cblas_dsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const double alpha,
|
||||
const double* x, const int x_inc,
|
||||
const double* y, const int y_inc,
|
||||
double* a, const int a_ld);
|
||||
|
||||
// Symmetric packed rank-2 matrix update: SSPR2/DSPR2/HSPR2
|
||||
void PUBLIC_API cblas_sspr2(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const float alpha,
|
||||
const float* x, const int x_inc,
|
||||
const float* y, const int y_inc,
|
||||
float* ap);
|
||||
void PUBLIC_API cblas_dspr2(const CLBlastLayout layout, const CLBlastTriangle triangle,
|
||||
const int n,
|
||||
const double alpha,
|
||||
const double* x, const int x_inc,
|
||||
const double* y, const int y_inc,
|
||||
double* ap);
|
||||
|
||||
// =================================================================================================
|
||||
// BLAS level-3 (matrix-matrix) routines
|
||||
// =================================================================================================
|
||||
|
||||
// General matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM
|
||||
void PUBLIC_API cblas_sgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const int m, const int n, const int k,
|
||||
const float alpha,
|
||||
const float* a, const int a_ld,
|
||||
const float* b, const int b_ld,
|
||||
const float beta,
|
||||
float* c, const int c_ld);
|
||||
void PUBLIC_API cblas_dgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const int m, const int n, const int k,
|
||||
const double alpha,
|
||||
const double* a, const int a_ld,
|
||||
const double* b, const int b_ld,
|
||||
const double beta,
|
||||
double* c, const int c_ld);
|
||||
void PUBLIC_API cblas_cgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const int m, const int n, const int k,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* b, const int b_ld,
|
||||
const void* beta,
|
||||
void* c, const int c_ld);
|
||||
void PUBLIC_API cblas_zgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const int m, const int n, const int k,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* b, const int b_ld,
|
||||
const void* beta,
|
||||
void* c, const int c_ld);
|
||||
|
||||
// Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM/HSYMM
|
||||
void PUBLIC_API cblas_ssymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle,
|
||||
const int m, const int n,
|
||||
const float alpha,
|
||||
const float* a, const int a_ld,
|
||||
const float* b, const int b_ld,
|
||||
const float beta,
|
||||
float* c, const int c_ld);
|
||||
void PUBLIC_API cblas_dsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle,
|
||||
const int m, const int n,
|
||||
const double alpha,
|
||||
const double* a, const int a_ld,
|
||||
const double* b, const int b_ld,
|
||||
const double beta,
|
||||
double* c, const int c_ld);
|
||||
void PUBLIC_API cblas_csymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* b, const int b_ld,
|
||||
const void* beta,
|
||||
void* c, const int c_ld);
|
||||
void PUBLIC_API cblas_zsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* b, const int b_ld,
|
||||
const void* beta,
|
||||
void* c, const int c_ld);
|
||||
|
||||
// Hermitian matrix-matrix multiplication: CHEMM/ZHEMM
|
||||
void PUBLIC_API cblas_chemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* b, const int b_ld,
|
||||
const void* beta,
|
||||
void* c, const int c_ld);
|
||||
void PUBLIC_API cblas_zhemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* b, const int b_ld,
|
||||
const void* beta,
|
||||
void* c, const int c_ld);
|
||||
|
||||
// Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK/HSYRK
|
||||
void PUBLIC_API cblas_ssyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose,
|
||||
const int n, const int k,
|
||||
const float alpha,
|
||||
const float* a, const int a_ld,
|
||||
const float beta,
|
||||
float* c, const int c_ld);
|
||||
void PUBLIC_API cblas_dsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose,
|
||||
const int n, const int k,
|
||||
const double alpha,
|
||||
const double* a, const int a_ld,
|
||||
const double beta,
|
||||
double* c, const int c_ld);
|
||||
void PUBLIC_API cblas_csyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose,
|
||||
const int n, const int k,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* beta,
|
||||
void* c, const int c_ld);
|
||||
void PUBLIC_API cblas_zsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose,
|
||||
const int n, const int k,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* beta,
|
||||
void* c, const int c_ld);
|
||||
|
||||
// Rank-K update of a hermitian matrix: CHERK/ZHERK
|
||||
void PUBLIC_API cblas_cherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose,
|
||||
const int n, const int k,
|
||||
const float alpha,
|
||||
const void* a, const int a_ld,
|
||||
const float beta,
|
||||
void* c, const int c_ld);
|
||||
void PUBLIC_API cblas_zherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose,
|
||||
const int n, const int k,
|
||||
const double alpha,
|
||||
const void* a, const int a_ld,
|
||||
const double beta,
|
||||
void* c, const int c_ld);
|
||||
|
||||
// Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K/HSYR2K
|
||||
void PUBLIC_API cblas_ssyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose,
|
||||
const int n, const int k,
|
||||
const float alpha,
|
||||
const float* a, const int a_ld,
|
||||
const float* b, const int b_ld,
|
||||
const float beta,
|
||||
float* c, const int c_ld);
|
||||
void PUBLIC_API cblas_dsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose,
|
||||
const int n, const int k,
|
||||
const double alpha,
|
||||
const double* a, const int a_ld,
|
||||
const double* b, const int b_ld,
|
||||
const double beta,
|
||||
double* c, const int c_ld);
|
||||
void PUBLIC_API cblas_csyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose,
|
||||
const int n, const int k,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* b, const int b_ld,
|
||||
const void* beta,
|
||||
void* c, const int c_ld);
|
||||
void PUBLIC_API cblas_zsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose,
|
||||
const int n, const int k,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* b, const int b_ld,
|
||||
const void* beta,
|
||||
void* c, const int c_ld);
|
||||
|
||||
// Rank-2K update of a hermitian matrix: CHER2K/ZHER2K
|
||||
void PUBLIC_API cblas_cher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose,
|
||||
const int n, const int k,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* b, const int b_ld,
|
||||
const float beta,
|
||||
void* c, const int c_ld);
|
||||
void PUBLIC_API cblas_zher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose,
|
||||
const int n, const int k,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
const void* b, const int b_ld,
|
||||
const double beta,
|
||||
void* c, const int c_ld);
|
||||
|
||||
// Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM/HTRMM
|
||||
void PUBLIC_API cblas_strmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int m, const int n,
|
||||
const float alpha,
|
||||
const float* a, const int a_ld,
|
||||
float* b, const int b_ld);
|
||||
void PUBLIC_API cblas_dtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int m, const int n,
|
||||
const double alpha,
|
||||
const double* a, const int a_ld,
|
||||
double* b, const int b_ld);
|
||||
void PUBLIC_API cblas_ctrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
void* b, const int b_ld);
|
||||
void PUBLIC_API cblas_ztrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
void* b, const int b_ld);
|
||||
|
||||
// Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM
|
||||
void PUBLIC_API cblas_strsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int m, const int n,
|
||||
const float alpha,
|
||||
const float* a, const int a_ld,
|
||||
float* b, const int b_ld);
|
||||
void PUBLIC_API cblas_dtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int m, const int n,
|
||||
const double alpha,
|
||||
const double* a, const int a_ld,
|
||||
double* b, const int b_ld);
|
||||
void PUBLIC_API cblas_ctrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
void* b, const int b_ld);
|
||||
void PUBLIC_API cblas_ztrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
void* b, const int b_ld);
|
||||
|
||||
// =================================================================================================
|
||||
// Extra non-BLAS routines (level-X)
|
||||
// =================================================================================================
|
||||
|
||||
// Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD
|
||||
void PUBLIC_API cblas_shad(const int n,
|
||||
const float alpha,
|
||||
const float* x, const int x_inc,
|
||||
const float* y, const int y_inc,
|
||||
const float beta,
|
||||
float* z, const int z_inc);
|
||||
void PUBLIC_API cblas_dhad(const int n,
|
||||
const double alpha,
|
||||
const double* x, const int x_inc,
|
||||
const double* y, const int y_inc,
|
||||
const double beta,
|
||||
double* z, const int z_inc);
|
||||
void PUBLIC_API cblas_chad(const int n,
|
||||
const void* alpha,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
const void* beta,
|
||||
void* z, const int z_inc);
|
||||
void PUBLIC_API cblas_zhad(const int n,
|
||||
const void* alpha,
|
||||
const void* x, const int x_inc,
|
||||
const void* y, const int y_inc,
|
||||
const void* beta,
|
||||
void* z, const int z_inc);
|
||||
|
||||
// Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY
|
||||
void PUBLIC_API cblas_somatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
|
||||
const int m, const int n,
|
||||
const float alpha,
|
||||
const float* a, const int a_ld,
|
||||
float* b, const int b_ld);
|
||||
void PUBLIC_API cblas_domatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
|
||||
const int m, const int n,
|
||||
const double alpha,
|
||||
const double* a, const int a_ld,
|
||||
double* b, const int b_ld);
|
||||
void PUBLIC_API cblas_comatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
void* b, const int b_ld);
|
||||
void PUBLIC_API cblas_zomatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
|
||||
const int m, const int n,
|
||||
const void* alpha,
|
||||
const void* a, const int a_ld,
|
||||
void* b, const int b_ld);
|
||||
|
||||
// Im2col function (non-BLAS function): SIM2COL/DIM2COL/CIM2COL/ZIM2COL/HIM2COL
|
||||
void PUBLIC_API cblas_sim2col(const CLBlastKernelMode kernel_mode,
|
||||
const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w,
|
||||
const float* im,
|
||||
float* col);
|
||||
void PUBLIC_API cblas_dim2col(const CLBlastKernelMode kernel_mode,
|
||||
const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w,
|
||||
const double* im,
|
||||
double* col);
|
||||
void PUBLIC_API cblas_cim2col(const CLBlastKernelMode kernel_mode,
|
||||
const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w,
|
||||
const void* im,
|
||||
void* col);
|
||||
void PUBLIC_API cblas_zim2col(const CLBlastKernelMode kernel_mode,
|
||||
const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w,
|
||||
const void* im,
|
||||
void* col);
|
||||
|
||||
// Col2im function (non-BLAS function): SCOL2IM/DCOL2IM/CCOL2IM/ZCOL2IM/HCOL2IM
|
||||
void PUBLIC_API cblas_scol2im(const CLBlastKernelMode kernel_mode,
|
||||
const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w,
|
||||
const float* col,
|
||||
float* im);
|
||||
void PUBLIC_API cblas_dcol2im(const CLBlastKernelMode kernel_mode,
|
||||
const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w,
|
||||
const double* col,
|
||||
double* im);
|
||||
void PUBLIC_API cblas_ccol2im(const CLBlastKernelMode kernel_mode,
|
||||
const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w,
|
||||
const void* col,
|
||||
void* im);
|
||||
void PUBLIC_API cblas_zcol2im(const CLBlastKernelMode kernel_mode,
|
||||
const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w,
|
||||
const void* col,
|
||||
void* im);
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
// CLBLAST_CLBLAST_NETLIB_C_H_
|
||||
#endif
|
||||
|
|
@ -1,133 +0,0 @@
|
|||
#ifndef OPENBLAS_CONFIG_H
|
||||
#define OPENBLAS_CONFIG_H
|
||||
#define OPENBLAS_OS_WINNT 1
|
||||
#define OPENBLAS_ARCH_X86_64 1
|
||||
#define OPENBLAS_C_GCC 1
|
||||
#define OPENBLAS___64BIT__ 1
|
||||
#define OPENBLAS_HAVE_C11 1
|
||||
#define OPENBLAS_PTHREAD_CREATE_FUNC pthread_create
|
||||
#define OPENBLAS_BUNDERSCORE _
|
||||
#define OPENBLAS_NEEDBUNDERSCORE 1
|
||||
#define OPENBLAS_GENERIC
|
||||
#define OPENBLAS_L1_DATA_SIZE 32768
|
||||
#define OPENBLAS_L1_DATA_LINESIZE 128
|
||||
#define OPENBLAS_L2_SIZE 512488
|
||||
#define OPENBLAS_L2_LINESIZE 128
|
||||
#define OPENBLAS_DTB_DEFAULT_ENTRIES 128
|
||||
#define OPENBLAS_DTB_SIZE 4096
|
||||
#define OPENBLAS_L2_ASSOCIATIVE 8
|
||||
#define OPENBLAS_CORE_generic
|
||||
#define OPENBLAS_CHAR_CORENAME "generic"
|
||||
#define OPENBLAS_SLOCAL_BUFFER_SIZE 4096
|
||||
#define OPENBLAS_DLOCAL_BUFFER_SIZE 4096
|
||||
#define OPENBLAS_CLOCAL_BUFFER_SIZE 8192
|
||||
#define OPENBLAS_ZLOCAL_BUFFER_SIZE 8192
|
||||
#define OPENBLAS_GEMM_MULTITHREAD_THRESHOLD 4
|
||||
#define OPENBLAS_VERSION " OpenBLAS 0.3.22 "
|
||||
/*This is only for "make install" target.*/
|
||||
|
||||
#if defined(OPENBLAS_OS_WINNT) || defined(OPENBLAS_OS_CYGWIN_NT) || defined(OPENBLAS_OS_INTERIX)
|
||||
#define OPENBLAS_WINDOWS_ABI
|
||||
#define OPENBLAS_OS_WINDOWS
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOUBLE_DEFINED DOUBLE
|
||||
#undef DOUBLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef OPENBLAS_NEEDBUNDERSCORE
|
||||
#define BLASFUNC(FUNC) FUNC##_
|
||||
#else
|
||||
#define BLASFUNC(FUNC) FUNC
|
||||
#endif
|
||||
|
||||
#ifdef OPENBLAS_QUAD_PRECISION
|
||||
typedef struct {
|
||||
unsigned long x[2];
|
||||
} xdouble;
|
||||
#elif defined OPENBLAS_EXPRECISION
|
||||
#define xdouble long double
|
||||
#else
|
||||
#define xdouble double
|
||||
#endif
|
||||
|
||||
#if defined(OPENBLAS_OS_WINDOWS) && defined(OPENBLAS___64BIT__)
|
||||
typedef long long BLASLONG;
|
||||
typedef unsigned long long BLASULONG;
|
||||
#else
|
||||
typedef long BLASLONG;
|
||||
typedef unsigned long BLASULONG;
|
||||
#endif
|
||||
|
||||
#ifndef BFLOAT16
|
||||
#include <stdint.h>
|
||||
typedef uint16_t bfloat16;
|
||||
#endif
|
||||
|
||||
#ifdef OPENBLAS_USE64BITINT
|
||||
typedef BLASLONG blasint;
|
||||
#else
|
||||
typedef int blasint;
|
||||
#endif
|
||||
|
||||
#if defined(XDOUBLE) || defined(DOUBLE)
|
||||
#define FLOATRET FLOAT
|
||||
#else
|
||||
#ifdef NEED_F2CCONV
|
||||
#define FLOATRET double
|
||||
#else
|
||||
#define FLOATRET float
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Inclusion of a standard header file is needed for definition of __STDC_*
|
||||
predefined macros with some compilers (e.g. GCC 4.7 on Linux). This occurs
|
||||
as a side effect of including either <features.h> or <stdc-predef.h>. */
|
||||
#include <stdio.h>
|
||||
|
||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
||||
extension since version 3.0. If neither are available, use a compatible
|
||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER)
|
||||
#define OPENBLAS_COMPLEX_C99
|
||||
#ifndef __cplusplus
|
||||
#include <complex.h>
|
||||
#endif
|
||||
typedef float _Complex openblas_complex_float;
|
||||
typedef double _Complex openblas_complex_double;
|
||||
typedef xdouble _Complex openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_complex_float_real(z) (creal(z))
|
||||
#define openblas_complex_float_imag(z) (cimag(z))
|
||||
#define openblas_complex_double_real(z) (creal(z))
|
||||
#define openblas_complex_double_imag(z) (cimag(z))
|
||||
#define openblas_complex_xdouble_real(z) (creal(z))
|
||||
#define openblas_complex_xdouble_imag(z) (cimag(z))
|
||||
#else
|
||||
#define OPENBLAS_COMPLEX_STRUCT
|
||||
typedef struct { float real, imag; } openblas_complex_float;
|
||||
typedef struct { double real, imag; } openblas_complex_double;
|
||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_double(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
|
||||
#define openblas_complex_float_real(z) ((z).real)
|
||||
#define openblas_complex_float_imag(z) ((z).imag)
|
||||
#define openblas_complex_double_real(z) ((z).real)
|
||||
#define openblas_complex_double_imag(z) ((z).imag)
|
||||
#define openblas_complex_xdouble_real(z) ((z).real)
|
||||
#define openblas_complex_xdouble_imag(z) ((z).imag)
|
||||
#endif
|
||||
|
||||
/* Inclusion of Linux-specific header is needed for definition of cpu_set_t. */
|
||||
#ifdef OPENBLAS_OS_LINUX
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <sched.h>
|
||||
#endif
|
||||
#endif /* OPENBLAS_CONFIG_H */
|
||||
BIN
lib/OpenCL.lib
BIN
lib/OpenCL.lib
Binary file not shown.
BIN
lib/clblast.lib
BIN
lib/clblast.lib
Binary file not shown.
|
|
@ -1,427 +0,0 @@
|
|||
#include "ggml_v2-opencl-legacy.h"
|
||||
|
||||
#define CL_TARGET_OPENCL_VERSION 110
|
||||
#include <clblast_c.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "ggml_v2.h"
|
||||
|
||||
#define MULTILINE_QUOTE(...) #__VA_ARGS__
|
||||
const char * clblast_dequant_legacy = MULTILINE_QUOTE(
|
||||
|
||||
struct block_q4_0
|
||||
{
|
||||
float d;
|
||||
uchar qs[16];
|
||||
};
|
||||
|
||||
__kernel void dequantize_row_q4_0(__global struct block_q4_0* blocks, __global float* result) {
|
||||
const uint i = get_global_id(0) / 32;
|
||||
const uint l = get_local_id(0);
|
||||
|
||||
const float d = blocks[i].d;
|
||||
|
||||
const uchar vi = blocks[i].qs[l];
|
||||
|
||||
const uint index = i*32 + l*2;
|
||||
result[index + 0] = ((vi & 0xf) - 8)*d;
|
||||
result[index + 1] = ((vi >> 4) - 8)*d;
|
||||
}
|
||||
|
||||
struct block_q4_1
|
||||
{
|
||||
float d;
|
||||
float m;
|
||||
uchar qs[16];
|
||||
};
|
||||
|
||||
__kernel void dequantize_row_q4_1(__global struct block_q4_1* blocks, __global float* result) {
|
||||
const uint i = get_global_id(0) / 32;
|
||||
const uint l = get_local_id(0);
|
||||
|
||||
const float d = blocks[i].d;
|
||||
const float m = blocks[i].m;
|
||||
|
||||
const uchar vi = blocks[i].qs[l];
|
||||
|
||||
const uint index = i*32 + l*2;
|
||||
result[index + 0] = (vi & 0xf) * d + m;
|
||||
result[index + 1] = (vi >> 4) * d + m;
|
||||
}
|
||||
|
||||
struct block_q4_2
|
||||
{
|
||||
ushort d;
|
||||
uchar qs[8];
|
||||
};
|
||||
|
||||
__kernel void dequantize_row_q4_2(__global struct block_q4_2* blocks, __global float* result) {
|
||||
const uint i = get_global_id(0) / 16;
|
||||
const uint l = get_local_id(0);
|
||||
|
||||
const float d = vload_half(0, (__global half*) &blocks[i].d);
|
||||
|
||||
const uchar vi = blocks[i].qs[l];
|
||||
|
||||
const uint index = i*16 + l*2;
|
||||
result[index + 0] = ((vi & 0xf) - 8)*d;
|
||||
result[index + 1] = ((vi >> 4) - 8)*d;
|
||||
}
|
||||
|
||||
struct block_q4_3
|
||||
{
|
||||
ushort d;
|
||||
ushort m;
|
||||
uchar qs[8];
|
||||
};
|
||||
|
||||
__kernel void dequantize_row_q4_3(__global struct block_q4_3* blocks, __global float* result) {
|
||||
const uint i = get_global_id(0) / 16;
|
||||
const uint l = get_local_id(0);
|
||||
|
||||
const float d = vload_half(0, (__global half*) &(blocks[i].d));
|
||||
const float m = vload_half(0, (__global half*) &(blocks[i].m));
|
||||
|
||||
const uchar vi = blocks[i].qs[l];
|
||||
|
||||
const uint index = i*16 + l*2;
|
||||
result[index + 0] = (vi & 0xf) * d + m;
|
||||
result[index + 1] = (vi >> 4) * d + m;
|
||||
}
|
||||
|
||||
struct block_q5_0
|
||||
{
|
||||
float d;
|
||||
uint qh;
|
||||
uchar qs[16];
|
||||
};
|
||||
|
||||
__kernel void dequantize_row_q5_0(__global struct block_q5_0* blocks, __global float* result) {
|
||||
const uint i = get_global_id(0) / 32;
|
||||
const uint l = get_local_id(0);
|
||||
|
||||
const float d = blocks[i].d;
|
||||
|
||||
const uchar vi = blocks[i].qs[l];
|
||||
|
||||
const uint l2 = l * 2;
|
||||
|
||||
const uchar vh0 = ((blocks[i].qh & (1 << (l2 + 0))) >> (l2 + 0)) << 4;
|
||||
const uchar vh1 = ((blocks[i].qh & (1 << (l2 + 1))) >> (l2 + 1)) << 4;
|
||||
|
||||
const uint index = i*32 + l2;
|
||||
result[index + 0] = (((vi & 0xf) | vh0) - 16)*d;
|
||||
result[index + 1] = (((vi >> 4) | vh1) - 16)*d;
|
||||
}
|
||||
|
||||
struct block_q5_1
|
||||
{
|
||||
ushort d;
|
||||
ushort m;
|
||||
uint qh;
|
||||
uchar qs[16];
|
||||
};
|
||||
|
||||
__kernel void dequantize_row_q5_1(__global struct block_q5_1* blocks, __global float* result) {
|
||||
const uint i = get_global_id(0) / 32;
|
||||
const uint l = get_local_id(0);
|
||||
|
||||
const float d = vload_half(0, (__global half*) &blocks[i].d);
|
||||
const float m = vload_half(0, (__global half*) &blocks[i].m);
|
||||
|
||||
const uchar vi = blocks[i].qs[l];
|
||||
|
||||
const uint l2 = l * 2;
|
||||
|
||||
const uchar vh0 = ((blocks[i].qh & (1 << (l2 + 0))) >> (l2 + 0)) << 4;
|
||||
const uchar vh1 = ((blocks[i].qh & (1 << (l2 + 1))) >> (l2 + 1)) << 4;
|
||||
|
||||
const uint index = i*32 + l2;
|
||||
result[index + 0] = ((vi & 0xf) | vh0)*d + m;
|
||||
result[index + 1] = ((vi >> 4) | vh1)*d + m;
|
||||
}
|
||||
|
||||
struct block_q8_0
|
||||
{
|
||||
float d;
|
||||
char qs[32];
|
||||
};
|
||||
|
||||
__kernel void dequantize_row_q8_0(__global struct block_q8_0* blocks, __global float* result) {
|
||||
const uint i = get_global_id(0) / 32;
|
||||
const uint l = get_local_id(0);
|
||||
|
||||
result[i*32 + l] = blocks[i].qs[l] * blocks[i].d;
|
||||
}
|
||||
|
||||
);
|
||||
|
||||
#define CL_CHECK(err, name) \
|
||||
do { \
|
||||
cl_int err_ = (err); \
|
||||
if (err_ != CL_SUCCESS) { \
|
||||
fprintf(stderr, "OpenCL %s error %d at %s:%d\n", name, err_, __FILE__, __LINE__); \
|
||||
fprintf(stderr, "You may be out of VRAM. Please check if you have enough.\n"); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define QK5_0 32
|
||||
typedef struct {
|
||||
ggml_v2_fp16_t d; // delta
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_0 / 2]; // nibbles / quants
|
||||
} block_q5_0;
|
||||
|
||||
|
||||
typedef struct {
|
||||
float d; // delta
|
||||
uint32_t qh; // 5-th bit of quants
|
||||
uint8_t qs[QK5_0 / 2]; // nibbles / quants
|
||||
} cl_block_q5_0;
|
||||
|
||||
static cl_platform_id platform;
|
||||
static cl_device_id device;
|
||||
static cl_context context;
|
||||
static cl_command_queue queue;
|
||||
static cl_program program;
|
||||
static cl_kernel kernel_q4_0, kernel_q4_1, kernel_q4_2, kernel_q4_3, kernel_q5_0, kernel_q5_1, kernel_q8_0;
|
||||
static cl_mem cl_buffer_a, cl_buffer_qb, cl_buffer_b, cl_buffer_c;
|
||||
static size_t cl_size_a = 0, cl_size_qb = 0, cl_size_b = 0, cl_size_c = 0;
|
||||
|
||||
static cl_program build_program_from_source(cl_context ctx, cl_device_id dev, const char* program_buffer) {
|
||||
cl_program p;
|
||||
char *program_log;
|
||||
size_t program_size, log_size;
|
||||
int err;
|
||||
|
||||
program_size = strlen(program_buffer);
|
||||
|
||||
p = clCreateProgramWithSource(ctx, 1, (const char**)&program_buffer, &program_size, &err);
|
||||
if(err < 0) {
|
||||
fprintf(stderr, "OpenCL error creating program");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
err = clBuildProgram(p, 0, NULL, NULL, NULL, NULL);
|
||||
if(err < 0) {
|
||||
|
||||
clGetProgramBuildInfo(p, dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
||||
program_log = (char*) malloc(log_size + 1);
|
||||
program_log[log_size] = '\0';
|
||||
clGetProgramBuildInfo(p, dev, CL_PROGRAM_BUILD_LOG, log_size + 1, program_log, NULL);
|
||||
printf("%s\n", program_log);
|
||||
free(program_log);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void ggml_v2_cl_init_legacy(void) {
|
||||
cl_int err = 0;
|
||||
char * GGML_V2_CLBLAST_PLATFORM = getenv("GGML_OPENCL_PLATFORM");
|
||||
char * GGML_V2_CLBLAST_DEVICE = getenv("GGML_OPENCL_DEVICE");
|
||||
int plat_num = (GGML_V2_CLBLAST_PLATFORM == NULL ? 0 : atoi(GGML_V2_CLBLAST_PLATFORM));
|
||||
int dev_num = (GGML_V2_CLBLAST_DEVICE == NULL ? 0 : atoi(GGML_V2_CLBLAST_DEVICE));
|
||||
printf("\nInitializing LEGACY CLBlast (First Run)...");
|
||||
printf("\nAttempting to use: Platform=%d, Device=%d (If invalid, program will crash)\n",plat_num,dev_num);
|
||||
cl_uint num_platforms;
|
||||
clGetPlatformIDs(0, NULL, &num_platforms);
|
||||
cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
|
||||
clGetPlatformIDs(num_platforms, platforms, NULL);
|
||||
platform = platforms[plat_num];
|
||||
char platform_buffer[1024];
|
||||
clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platform_buffer), &platform_buffer, NULL);
|
||||
cl_uint num_devices;
|
||||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
|
||||
cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
|
||||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
|
||||
device = devices[dev_num];
|
||||
char device_buffer[1024];
|
||||
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_buffer), &device_buffer, NULL);
|
||||
printf("Using Platform: %s Device: %s\n", platform_buffer, device_buffer);
|
||||
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
|
||||
CL_CHECK(err, "clCreateContext");
|
||||
queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
|
||||
CL_CHECK(err, "clCreateCommandQueue");
|
||||
|
||||
free(platforms);
|
||||
free(devices);
|
||||
|
||||
program = build_program_from_source(context, device, clblast_dequant_legacy);
|
||||
|
||||
// Prepare dequantize kernels
|
||||
kernel_q4_0 = clCreateKernel(program, "dequantize_row_q4_0", &err);
|
||||
CL_CHECK(err, "clCreateKernel");
|
||||
kernel_q4_1 = clCreateKernel(program, "dequantize_row_q4_1", &err);
|
||||
CL_CHECK(err, "clCreateKernel");
|
||||
kernel_q4_2 = clCreateKernel(program, "dequantize_row_q4_2", &err);
|
||||
CL_CHECK(err, "clCreateKernel");
|
||||
kernel_q4_3 = clCreateKernel(program, "dequantize_row_q4_3", &err);
|
||||
CL_CHECK(err, "clCreateKernel");
|
||||
kernel_q5_0 = clCreateKernel(program, "dequantize_row_q5_0", &err);
|
||||
CL_CHECK(err, "clCreateKernel");
|
||||
kernel_q5_1 = clCreateKernel(program, "dequantize_row_q5_1", &err);
|
||||
CL_CHECK(err, "clCreateKernel");
|
||||
kernel_q8_0 = clCreateKernel(program, "dequantize_row_q8_0", &err);
|
||||
CL_CHECK(err, "clCreateKernel");
|
||||
}
|
||||
|
||||
static void ggml_v2_cl_malloc(size_t req_size, size_t* cur_size, cl_mem_flags flags, cl_mem* buf) {
|
||||
if (req_size <= *cur_size) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Reallocate buffer with enough space
|
||||
if (*cur_size > 0) {
|
||||
clReleaseMemObject(*buf);
|
||||
}
|
||||
cl_int err;
|
||||
*buf = clCreateBuffer(context, flags, req_size, NULL, &err);
|
||||
*cur_size = req_size;
|
||||
CL_CHECK(err, "clCreateBuffer");
|
||||
}
|
||||
|
||||
void ggml_v2_cl_sgemm_wrapper_legacy(
|
||||
const enum ggml_v2_blas_order order, const enum ggml_v2_blas_op trans_a, const enum ggml_v2_blas_op trans_b,
|
||||
const int m, const int n, const int k,
|
||||
const float alpha, const void *host_a, const int lda,
|
||||
const float *host_b, const int ldb, const float beta,
|
||||
float *host_c, const int ldc, const int btype) {
|
||||
cl_int err = 0;
|
||||
|
||||
cl_kernel kernel;
|
||||
size_t global = n * k, local, size_qb;
|
||||
bool dequant;
|
||||
cl_block_q5_0* cl_host_b;
|
||||
|
||||
switch (btype) {
|
||||
case GGML_V2_TYPE_F32:
|
||||
dequant = false;
|
||||
break;
|
||||
case GGML_V2_TYPE_Q4_0:
|
||||
dequant = true;
|
||||
kernel = kernel_q4_0;
|
||||
local = 16;
|
||||
size_qb = global * (sizeof(float) + local) / 32;
|
||||
break;
|
||||
case GGML_V2_TYPE_Q4_1:
|
||||
dequant = true;
|
||||
kernel = kernel_q4_1;
|
||||
local = 16;
|
||||
size_qb = global * (sizeof(float) * 2 + local) / 32;
|
||||
break;
|
||||
case GGML_V2_TYPE_Q4_2:
|
||||
dequant = true;
|
||||
kernel = kernel_q4_2;
|
||||
local = 8;
|
||||
size_qb = global * (sizeof(ggml_v2_fp16_t) + local) / 16;
|
||||
break;
|
||||
case GGML_V2_TYPE_Q4_3:
|
||||
dequant = true;
|
||||
kernel = kernel_q4_3;
|
||||
local = 8;
|
||||
size_qb = global * (sizeof(short) * 2 + local) / 16;
|
||||
break;
|
||||
case GGML_V2_TYPE_Q5_0:
|
||||
dequant = true;
|
||||
kernel = kernel_q5_0;
|
||||
local = 16;
|
||||
// For some reason OpenCL seems to be incapable of working with structs of size 22.
|
||||
// 20 and 24 bytes are fine. Workaround to do the fp16 to fp32 step on CPU...
|
||||
// TODO Find the reason, fix and remove workaround.
|
||||
const block_q5_0* b = (const block_q5_0*) host_b;
|
||||
cl_host_b = (cl_block_q5_0*) malloc(sizeof(cl_block_q5_0) * global / 32);
|
||||
for (size_t i = 0; i < global / 32; i++) {
|
||||
cl_host_b[i].d = ggml_v2_fp16_to_fp32(b[i].d);
|
||||
memcpy(&cl_host_b[i].qh, b[i].qh, sizeof(uint32_t));
|
||||
memcpy(&cl_host_b[i].qs, b[i].qs, QK5_0 / 2);
|
||||
}
|
||||
host_b = (const float*) cl_host_b;
|
||||
size_qb = global * (sizeof(float) + sizeof(uint32_t) + local) / 32;
|
||||
break;
|
||||
case GGML_V2_TYPE_Q5_1:
|
||||
dequant = true;
|
||||
kernel = kernel_q5_1;
|
||||
local = 16;
|
||||
size_qb = global * (sizeof(ggml_v2_fp16_t) * 2 + sizeof(uint32_t) + local) / 32;
|
||||
break;
|
||||
case GGML_V2_TYPE_Q8_0:
|
||||
dequant = true;
|
||||
kernel = kernel_q8_0;
|
||||
local = 32;
|
||||
size_qb = global * (sizeof(float) + local) / 32;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Error: Unsupported OpenCL btype %d\n", btype);
|
||||
abort();
|
||||
}
|
||||
|
||||
const size_t size_a = m * k * sizeof(float);
|
||||
const size_t size_b = n * k * sizeof(float);
|
||||
const size_t size_c = m * n * sizeof(float);
|
||||
|
||||
// Prepare buffers
|
||||
ggml_v2_cl_malloc(size_a, &cl_size_a, CL_MEM_READ_ONLY, &cl_buffer_a);
|
||||
if (dequant) {
|
||||
ggml_v2_cl_malloc(size_qb, &cl_size_qb, CL_MEM_READ_ONLY, &cl_buffer_qb);
|
||||
}
|
||||
ggml_v2_cl_malloc(size_b, &cl_size_b, CL_MEM_READ_WRITE, &cl_buffer_b);
|
||||
ggml_v2_cl_malloc(size_c, &cl_size_c, CL_MEM_WRITE_ONLY, &cl_buffer_c);
|
||||
|
||||
cl_event ev_a, ev_qb, ev_b;
|
||||
|
||||
if (dequant) {
|
||||
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_buffer_qb);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &cl_buffer_b);
|
||||
CL_CHECK(err, "clSetKernelArg");
|
||||
err = clEnqueueWriteBuffer(queue, cl_buffer_qb, CL_FALSE, 0, size_qb, host_b, 0, NULL, &ev_qb);
|
||||
CL_CHECK(err, "clEnqueueWriteBuffer qb");
|
||||
} else {
|
||||
err = clEnqueueWriteBuffer(queue, cl_buffer_b, CL_FALSE, 0, size_b, host_b, 0, NULL, &ev_b);
|
||||
CL_CHECK(err, "clEnqueueWriteBuffer b");
|
||||
}
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, cl_buffer_a, CL_FALSE, 0, size_a, host_a, 0, NULL, &ev_a);
|
||||
CL_CHECK(err, "clEnqueueWriteBuffer a");
|
||||
if (dequant) {
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 1, &ev_qb, &ev_b);
|
||||
CL_CHECK(err, "clEnqueueNDRangeKernel");
|
||||
clReleaseEvent(ev_qb);
|
||||
}
|
||||
clWaitForEvents(1, &ev_a);
|
||||
clWaitForEvents(1, &ev_b);
|
||||
clReleaseEvent(ev_a);
|
||||
clReleaseEvent(ev_b);
|
||||
|
||||
cl_event ev_sgemm;
|
||||
CLBlastStatusCode status = CLBlastSgemm((CLBlastLayout)order,
|
||||
(CLBlastTranspose)trans_a, (CLBlastTranspose)trans_b,
|
||||
m, n, k,
|
||||
alpha,
|
||||
cl_buffer_a, 0, lda,
|
||||
cl_buffer_b, 0, ldb,
|
||||
beta,
|
||||
cl_buffer_c, 0, ldc,
|
||||
&queue, &ev_sgemm);
|
||||
|
||||
if (status != CLBlastSuccess) {
|
||||
fprintf(stderr, "Error: CLBlast SGEMM %d\n", status);
|
||||
abort();
|
||||
}
|
||||
|
||||
cl_event ev_c;
|
||||
clEnqueueReadBuffer(queue, cl_buffer_c, CL_TRUE, 0, size_c, host_c, 1, &ev_sgemm, &ev_c);
|
||||
|
||||
// Wait for completion
|
||||
clWaitForEvents(1, &ev_c);
|
||||
clReleaseEvent(ev_sgemm);
|
||||
clReleaseEvent(ev_c);
|
||||
if (btype == GGML_V2_TYPE_Q5_0) {
|
||||
free((void*) cl_host_b);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml_v2-opencl.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void ggml_v2_cl_init_legacy(void);
|
||||
|
||||
void ggml_v2_cl_sgemm_wrapper_legacy(const enum ggml_v2_blas_order order, const enum ggml_v2_blas_op trans_a, const enum ggml_v2_blas_op trans_b, const int m, const int n, const int k, const float alpha, const void *host_a, const int lda, const float *host_b, const int ldb, const float beta, float *host_c, const int ldc, const int btype);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,35 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml_v2.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum ggml_v2_blas_order {
|
||||
GGML_V2_BLAS_ORDER_ROW_MAJOR = 101,
|
||||
GGML_V2_BLAS_ORDER_COLUMN_MAJOR = 102,
|
||||
};
|
||||
|
||||
enum ggml_v2_blas_op {
|
||||
GGML_V2_BLAS_OP_N = 111,
|
||||
GGML_V2_BLAS_OP_T = 112,
|
||||
GGML_V2_BLAS_OP_C = 113,
|
||||
};
|
||||
|
||||
void ggml_v2_cl_init(void);
|
||||
|
||||
bool ggml_v2_cl_can_mul_mat(const struct ggml_v2_tensor * src0, const struct ggml_v2_tensor * src1, struct ggml_v2_tensor * dst);
|
||||
size_t ggml_v2_cl_mul_mat_get_wsize(const struct ggml_v2_tensor * src0, const struct ggml_v2_tensor * src1, struct ggml_v2_tensor * dst);
|
||||
void ggml_v2_cl_mul_mat(const struct ggml_v2_tensor * src0, const struct ggml_v2_tensor * src1, struct ggml_v2_tensor * dst, void * wdata, size_t wsize);
|
||||
|
||||
void * ggml_v2_cl_host_malloc(size_t size);
|
||||
void ggml_v2_cl_host_free(void * ptr);
|
||||
|
||||
void ggml_v2_cl_transform_tensor(struct ggml_v2_tensor * tensor);
|
||||
|
||||
void ggml_v2_cl_sgemm_wrapper(const enum ggml_v2_blas_order order, const enum ggml_v2_blas_op trans_a, const enum ggml_v2_blas_op trans_b, const int m, const int n, const int k, const float alpha, const void *host_a, const int lda, const float *host_b, const int ldb, const float beta, float *host_c, const int ldc, const int btype);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
@ -140,10 +140,6 @@ inline static void* ggml_v2_aligned_malloc(size_t size) {
|
|||
#include "ggml_v2-cuda.h"
|
||||
#include "ggml_v2-cuda-legacy.h"
|
||||
#endif
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
#include "ggml_v2-opencl.h"
|
||||
#include "ggml_v2-opencl-legacy.h"
|
||||
#endif
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
|
|
@ -3904,15 +3900,6 @@ struct ggml_v2_context * ggml_v2_init(struct ggml_v2_init_params params) {
|
|||
{
|
||||
ggml_v2_init_cublas_legacy();
|
||||
}
|
||||
#elif defined(GGML_USE_CLBLAST)
|
||||
if(quants_unshuffled)
|
||||
{
|
||||
ggml_v2_cl_init();
|
||||
}
|
||||
else
|
||||
{
|
||||
ggml_v2_cl_init_legacy();
|
||||
}
|
||||
#endif
|
||||
|
||||
is_first_call = false;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,25 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml_v3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
GGML_V3_API void ggml_v3_cl_init(void);
|
||||
|
||||
GGML_V3_API void ggml_v3_cl_mul(const struct ggml_v3_tensor * src0, const struct ggml_v3_tensor * src1, struct ggml_v3_tensor * dst);
|
||||
GGML_V3_API bool ggml_v3_cl_can_mul_mat(const struct ggml_v3_tensor * src0, const struct ggml_v3_tensor * src1, struct ggml_v3_tensor * dst);
|
||||
GGML_V3_API size_t ggml_v3_cl_mul_mat_get_wsize(const struct ggml_v3_tensor * src0, const struct ggml_v3_tensor * src1, struct ggml_v3_tensor * dst);
|
||||
GGML_V3_API void ggml_v3_cl_mul_mat(const struct ggml_v3_tensor * src0, const struct ggml_v3_tensor * src1, struct ggml_v3_tensor * dst, void * wdata, size_t wsize);
|
||||
|
||||
GGML_V3_API void * ggml_v3_cl_host_malloc(size_t size);
|
||||
GGML_V3_API void ggml_v3_cl_host_free(void * ptr);
|
||||
|
||||
GGML_V3_API void ggml_v3_cl_free_data(const struct ggml_v3_tensor* tensor);
|
||||
|
||||
GGML_V3_API void ggml_v3_cl_transform_tensor(void * data, struct ggml_v3_tensor * tensor);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,24 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-cpu.h"
|
||||
#include "ggml-backend.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
GGML_API void ggml_cl_init(void);
|
||||
|
||||
GGML_API void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||
GGML_API void ggml_cl_add(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||
GGML_API bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * dst);
|
||||
GGML_API size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||
GGML_API void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
||||
|
||||
GGML_API void ggml_cl_free_data(const struct ggml_tensor* tensor);
|
||||
GGML_API void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
@ -8,10 +8,6 @@
|
|||
#include <cstring>
|
||||
#include <future>
|
||||
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
# include "ggml_v3b-opencl.h"
|
||||
#endif
|
||||
|
||||
static const size_t kiB = 1024;
|
||||
static const size_t MiB = 1024*kiB;
|
||||
static const size_t GiB = 1024*MiB;
|
||||
|
|
@ -973,7 +969,6 @@ void llama_model_loader::load_data_for(struct ggml_tensor * cur) const {
|
|||
}
|
||||
}
|
||||
|
||||
static int clblast_offload_fallback_layers = 0;
|
||||
static int layer_name_to_number(std::string inputString)
|
||||
{
|
||||
size_t firstDotPosition = inputString.find('.');
|
||||
|
|
@ -1220,18 +1215,6 @@ bool llama_model_loader::load_all_data(
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
int layernum = layer_name_to_number(cur->name);
|
||||
bool shouldoffload = (layernum>=0 && clblast_offload_fallback_layers>layernum);
|
||||
if(shouldoffload)
|
||||
{
|
||||
cur->clblast_offload_gpu = true;
|
||||
ggml_cl_transform_tensor(cur->data, cur);
|
||||
}else{
|
||||
cur->clblast_offload_gpu = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
size_done += n_size;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -131,10 +131,6 @@
|
|||
#include "models/mistral3.cpp"
|
||||
#include "models/graph-context-mamba.cpp"
|
||||
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
# include "ggml_v3b-opencl.h"
|
||||
#endif
|
||||
|
||||
const char * llm_type_name(llm_type type) {
|
||||
switch (type) {
|
||||
case LLM_TYPE_14M: return "14M";
|
||||
|
|
@ -2639,12 +2635,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
|
||||
int i_gpu_start = std::max(int(hparams.n_layer) + 1 - n_gpu_layers, 0);
|
||||
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
printf("\nOpenCL GPU Offload Fallback...\n");
|
||||
clblast_offload_fallback_layers = n_gpu_layers;
|
||||
i_gpu_start = std::max((int64_t) hparams.n_layer, (int64_t) 0);
|
||||
#endif
|
||||
|
||||
const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, int(n_layer) + 1);
|
||||
auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
|
||||
const bool is_swa = il < int(hparams.n_layer) && hparams.is_swa(il);
|
||||
|
|
@ -7563,9 +7553,6 @@ static bool buft_supported(ggml_backend_buffer_type_t buft, ggml_backend_dev_t d
|
|||
throw std::runtime_error(format("failed to create ggml context"));
|
||||
}
|
||||
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
ggml_cl_init();
|
||||
#endif
|
||||
ggml_backend_buffer_ptr buf { ggml_backend_buft_alloc_buffer(buft, 0) };
|
||||
ggml_tensor * op_tensor = fn(ctx.get());
|
||||
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
||||
|
|
|
|||
|
|
@ -43,8 +43,6 @@ static bool old_mixtral_warning_showed = false;
|
|||
|
||||
#ifdef GGML_USE_CUDA
|
||||
# include "ggml-cuda.h"
|
||||
#elif defined(GGML_USE_CLBLAST)
|
||||
# include "ggml_v3b-opencl.h"
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
|
@ -804,13 +802,9 @@ bool llama_supports_mlock(void) {
|
|||
}
|
||||
|
||||
bool llama_supports_gpu_offload(void) {
|
||||
#if defined(GGML_USE_CLBLAST)
|
||||
return true;
|
||||
#else
|
||||
return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
|
||||
ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU) != nullptr ||
|
||||
llama_supports_rpc();
|
||||
#endif
|
||||
}
|
||||
|
||||
bool llama_supports_rpc(void) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue