init
Some checks failed
main / Linux (amd64, ubuntu-22.04) (push) Successful in 49s
main / Linux (arm64, ubuntu-24.04-arm) (push) Has been cancelled
main / Windows (push) Has been cancelled

This commit is contained in:
2025-10-24 11:42:14 +02:00
commit 42172cbb6f
85 changed files with 40316 additions and 0 deletions

121
converter/convert-llama.py Normal file
View File

@@ -0,0 +1,121 @@
import os
import sys
import json
import torch
import math
import numpy as np
from writer import writeTensor, writeHeader, parseFloatType, strFloatType, FloatType
from pathlib import Path
LAYER_CHUNK_SIZE = 48
def convert(modelPath, outputPath, targetFloatType):
paramsPath = os.path.join(modelPath, 'params.json')
with open(paramsPath) as f:
params = json.load(f)
if (params['vocab_size'] < 1):
raise Exception('vocab_size is invalid, please update params.json file')
if (params.get('max_seq_len') is None):
raise Exception('max_seq_len is required, please update params.json file')
params['n_kv_heads'] = params.get('n_kv_heads') or params['n_heads']
params['head_size'] = params['dim'] / params['n_heads']
params['arch_type'] = 0xABCD00
params['n_experts'] = 0
params['n_active_experts'] = 0
params['weights_float_type'] = targetFloatType
if ('rope_theta' in params):
params['rope_theta'] = int(params['rope_theta'])
modelPaths = sorted(list(Path(modelPath).glob('consolidated.*.pth')))
nSlices = len(modelPaths)
layers = []
layers.append('tok_embeddings.weight')
for layerIndex in range(0, params['n_layers']):
layers.append(f'layers.{layerIndex}.attention.wq.weight')
layers.append(f'layers.{layerIndex}.attention.wk.weight')
layers.append(f'layers.{layerIndex}.attention.wv.weight')
layers.append(f'layers.{layerIndex}.attention.wo.weight')
layers.append(f'layers.{layerIndex}.feed_forward.w1.weight')
layers.append(f'layers.{layerIndex}.feed_forward.w2.weight')
layers.append(f'layers.{layerIndex}.feed_forward.w3.weight')
layers.append(f'layers.{layerIndex}.attention_norm.weight')
layers.append(f'layers.{layerIndex}.ffn_norm.weight')
layers.append('norm.weight')
layers.append('output.weight')
isHeaderWrote = False
outFile = open(outputPath, 'wb')
nChunks = math.ceil(len(layers) / LAYER_CHUNK_SIZE)
for chunkIndex in range(0, nChunks):
chunkLayerNames = layers[LAYER_CHUNK_SIZE * chunkIndex:LAYER_CHUNK_SIZE * (chunkIndex + 1)]
models = {}
for layerName in chunkLayerNames:
models[layerName] = []
print(f'💿 Chunking model {chunkIndex + 1}/{nChunks}...')
for modelPath in modelPaths:
model = torch.load(modelPath, map_location='cpu')
for modelKey in model:
if (modelKey in chunkLayerNames):
models[modelKey].append(model[modelKey])
if not isHeaderWrote:
params['hidden_dim'] = model['layers.0.feed_forward.w1.weight'].shape[0] * nSlices
writeHeader(outFile, params)
isHeaderWrote = True
del model
for layerName in chunkLayerNames:
if layerName == 'rope.freqs':
continue
isAxis1 = (
layerName == 'tok_embeddings.weight' or
layerName.endswith('.attention.wo.weight') or
layerName.endswith('.feed_forward.w2.weight')
)
isAlwaysF32 = (
layerName == 'tok_embeddings.weight' or
layerName.endswith('.attention_norm.weight') or
layerName.endswith('.ffn_norm.weight') or
layerName == 'norm.weight'
)
floatType = FloatType.F32 if isAlwaysF32 else targetFloatType
tensors = models[layerName]
if len(tensors) == 1 or len(tensors[0].shape) == 1:
tensor = tensors[0]
else:
tensor = torch.cat(tensors, dim=(1 if isAxis1 else 0))
print(f'🔶 Exporting {layerName} {tensor.shape}...')
writeTensor(outFile, tensor, floatType)
del models
outFile.close()
def usage():
print('Usage: python convert-llama.py <modelPath> <targetFloatType>')
exit(1)
if __name__ == '__main__':
if (len(sys.argv) < 3):
usage()
modelPath = sys.argv[1]
targetFloatType = parseFloatType(sys.argv[2])
targetFloatTypeStr = strFloatType(targetFloatType)
modelName = os.path.basename(modelPath)
outputFileName = f'dllama_model_{modelName.lower()}_{targetFloatTypeStr}.m'
print(f'Model name: {modelName}')
print(f'Target float type: {targetFloatTypeStr}')
print(f'Target file: {outputFileName}')
convert(modelPath, outputFileName, targetFloatType)
print('Done!')