kvcache-ai-ktransformers/kt-sft/ktransformers/lora_test_module.py

import os
import platform
import sys

project_dir = os.path.dirname(os.path.dirname(__file__))
sys.path.insert(0, project_dir)

from torchviz import make_dot
from torch import nn
import torch
from transformers import (
    AutoTokenizer,
    AutoConfig,
    AutoModelForCausalLM,
    GenerationConfig,
    TextStreamer,
)

from ktransformers.operators.linear import KLinearTorch, KTransformersLinear
from ktransformers.sft.peft_utils.lora_layer import KTransformersLinearLora
from ktransformers.util.custom_loader import GGUFLoader
from ktransformers.util.inference_state import InferenceState

import hiddenlayer as hl

gguf_loader = GGUFLoader(gguf_path="/home/yj/ktransformers/GGUF-DeepSeek-V2-Lite-Chat")
config = AutoConfig.from_pretrained("/home/yj/ktransformers/DeepSeek-V2-Lite-Chat", trust_remote_code=True)
torch.set_default_dtype(config.torch_dtype)

class TestModelLora(nn.Module):
    def __init__(self):
        super().__init__()

        random_linear_layer = nn.Linear(in_features=3072, out_features=2048, bias=False)

        orig_linear = KTransformersLinear(
            key='blk.0.attn_q',
            gguf_loader=gguf_loader,
            config=config,
            orig_module=random_linear_layer,
            generate_op="KLinearTorch"
        )
        self.layer = KTransformersLinearLora(
            orig_module=orig_linear,
            adapter_name="lora_test",
            r=8,
            lora_alpha=16
        )
        self.layer.generate_linear.weight = torch.randn(3072, 2048).to("cuda")

    def forward(self, x):
        return self.layer(x)

class TestModelBase(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = KTransformersLinear(
            key="linear",
            gguf_loader=gguf_loader,
            config=config,
            orig_module=nn.Linear(in_features=3072, out_features=2048, bias=False),
            generate_op="KLinearTorch"
        )
        # self.layer.generate_linear.weight = torch.randn(3072, 2048).to("cuda")
        weight = torch.randn(3072, 2048, device="cuda")
        self.layer.load(w=nn.Parameter(weight), mode = InferenceState.GENERATE)
        # self.layer.generate_linear.weight = nn.Parameter(torch.randn(3072, 2048).to("cuda"))
        self.fc1 = nn.Linear(3072, 2048, bias=False)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(2048, 3072, bias=False)
        # self.layer.load(mode=InferenceState.GENERATE)

    def forward(self, x):
        x = self.layer(x)
        # x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

class TestModelTorch(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer = KLinearTorch(
            key="linear",
            gguf_loader=gguf_loader,
            config=config,
            orig_module=nn.Linear(in_features=3072, out_features=2048, bias=False)
        )
        # self.layer.weight = nn.Parameter(torch.randn(3072, 2048).to("cuda"))
        # self.layer.weight = torch.randn(3072, 2048).to("cuda")
        weight = torch.randn(3072, 2048, device="cuda")
        self.layer.load(w=nn.Parameter(weight), device="cuda")
        self.fc1 = nn.Linear(3072, 2048, bias=False)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(2048, 3072, bias=False)
        # self.layer.load(mode=InferenceState.GENERATE)

    def forward(self, x):
        x = self.layer(x)
        # x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x


# # KLinearTorch Well DONE for test!
# model = TestModelTorch()
# x = torch.randn(2048, 3072, requires_grad=True)
# out = model(x)
# make_dot(out, params=dict(model.named_parameters())).render("KTLinear_graph", format="svg")


# model = TestModelBase()
# x = torch.randn(2048, 3072, requires_grad=True)
# out = model(x)
# make_dot(out, params=dict(model.named_parameters())).render("base_graph", format="svg")

# MyConvNet_graph=hl.build_graph(model,torch.zeros(size=[2048, 3072]))
# MyConvNet_graph.theme=hl.graph.THEMES['blue'].copy()
# MyConvNet_graph.save(path='./base_graph.png',format='png')

# model = TestModelLora()
# x = torch.randn(2048, 3072, requires_grad=True)
# out = model(x)
# make_dot(out, params=dict(model.named_parameters())).render("lora_graph", format="svg")


from peft import LoraConfig, get_peft_model

class BaseModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3072, 2048, bias=False)

    def forward(self, x):
        return self.linear(x)

model = BaseModel().to("cuda")

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["linear"],
    lora_dropout=0.0,
    bias="none",
)

peft_model = get_peft_model(model, lora_config)
print(peft_model)

x = torch.randn(2048, 3072, requires_grad=True).to("cuda")

out = peft_model(x)

dot = make_dot(out,
             params=dict(peft_model.named_parameters()))

dot.render("origin_lora_graph", format="svg")