init
This commit is contained in:
121
converter/convert-llama.py
Normal file
121
converter/convert-llama.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import torch
|
||||
import math
|
||||
import numpy as np
|
||||
from writer import writeTensor, writeHeader, parseFloatType, strFloatType, FloatType
|
||||
from pathlib import Path
|
||||
|
||||
LAYER_CHUNK_SIZE = 48
|
||||
|
||||
def convert(modelPath, outputPath, targetFloatType):
|
||||
paramsPath = os.path.join(modelPath, 'params.json')
|
||||
with open(paramsPath) as f:
|
||||
params = json.load(f)
|
||||
if (params['vocab_size'] < 1):
|
||||
raise Exception('vocab_size is invalid, please update params.json file')
|
||||
if (params.get('max_seq_len') is None):
|
||||
raise Exception('max_seq_len is required, please update params.json file')
|
||||
params['n_kv_heads'] = params.get('n_kv_heads') or params['n_heads']
|
||||
params['head_size'] = params['dim'] / params['n_heads']
|
||||
params['arch_type'] = 0xABCD00
|
||||
params['n_experts'] = 0
|
||||
params['n_active_experts'] = 0
|
||||
params['weights_float_type'] = targetFloatType
|
||||
if ('rope_theta' in params):
|
||||
params['rope_theta'] = int(params['rope_theta'])
|
||||
|
||||
modelPaths = sorted(list(Path(modelPath).glob('consolidated.*.pth')))
|
||||
nSlices = len(modelPaths)
|
||||
|
||||
layers = []
|
||||
layers.append('tok_embeddings.weight')
|
||||
for layerIndex in range(0, params['n_layers']):
|
||||
layers.append(f'layers.{layerIndex}.attention.wq.weight')
|
||||
layers.append(f'layers.{layerIndex}.attention.wk.weight')
|
||||
layers.append(f'layers.{layerIndex}.attention.wv.weight')
|
||||
layers.append(f'layers.{layerIndex}.attention.wo.weight')
|
||||
layers.append(f'layers.{layerIndex}.feed_forward.w1.weight')
|
||||
layers.append(f'layers.{layerIndex}.feed_forward.w2.weight')
|
||||
layers.append(f'layers.{layerIndex}.feed_forward.w3.weight')
|
||||
layers.append(f'layers.{layerIndex}.attention_norm.weight')
|
||||
layers.append(f'layers.{layerIndex}.ffn_norm.weight')
|
||||
layers.append('norm.weight')
|
||||
layers.append('output.weight')
|
||||
|
||||
isHeaderWrote = False
|
||||
outFile = open(outputPath, 'wb')
|
||||
|
||||
nChunks = math.ceil(len(layers) / LAYER_CHUNK_SIZE)
|
||||
for chunkIndex in range(0, nChunks):
|
||||
chunkLayerNames = layers[LAYER_CHUNK_SIZE * chunkIndex:LAYER_CHUNK_SIZE * (chunkIndex + 1)]
|
||||
models = {}
|
||||
for layerName in chunkLayerNames:
|
||||
models[layerName] = []
|
||||
|
||||
print(f'💿 Chunking model {chunkIndex + 1}/{nChunks}...')
|
||||
|
||||
for modelPath in modelPaths:
|
||||
model = torch.load(modelPath, map_location='cpu')
|
||||
for modelKey in model:
|
||||
if (modelKey in chunkLayerNames):
|
||||
models[modelKey].append(model[modelKey])
|
||||
if not isHeaderWrote:
|
||||
params['hidden_dim'] = model['layers.0.feed_forward.w1.weight'].shape[0] * nSlices
|
||||
writeHeader(outFile, params)
|
||||
isHeaderWrote = True
|
||||
del model
|
||||
|
||||
for layerName in chunkLayerNames:
|
||||
if layerName == 'rope.freqs':
|
||||
continue
|
||||
|
||||
isAxis1 = (
|
||||
layerName == 'tok_embeddings.weight' or
|
||||
layerName.endswith('.attention.wo.weight') or
|
||||
layerName.endswith('.feed_forward.w2.weight')
|
||||
)
|
||||
isAlwaysF32 = (
|
||||
layerName == 'tok_embeddings.weight' or
|
||||
layerName.endswith('.attention_norm.weight') or
|
||||
layerName.endswith('.ffn_norm.weight') or
|
||||
layerName == 'norm.weight'
|
||||
)
|
||||
floatType = FloatType.F32 if isAlwaysF32 else targetFloatType
|
||||
|
||||
tensors = models[layerName]
|
||||
if len(tensors) == 1 or len(tensors[0].shape) == 1:
|
||||
tensor = tensors[0]
|
||||
else:
|
||||
tensor = torch.cat(tensors, dim=(1 if isAxis1 else 0))
|
||||
|
||||
print(f'🔶 Exporting {layerName} {tensor.shape}...')
|
||||
writeTensor(outFile, tensor, floatType)
|
||||
|
||||
del models
|
||||
|
||||
outFile.close()
|
||||
|
||||
def usage():
|
||||
print('Usage: python convert-llama.py <modelPath> <targetFloatType>')
|
||||
exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
if (len(sys.argv) < 3):
|
||||
usage()
|
||||
|
||||
modelPath = sys.argv[1]
|
||||
targetFloatType = parseFloatType(sys.argv[2])
|
||||
targetFloatTypeStr = strFloatType(targetFloatType)
|
||||
|
||||
modelName = os.path.basename(modelPath)
|
||||
outputFileName = f'dllama_model_{modelName.lower()}_{targetFloatTypeStr}.m'
|
||||
|
||||
print(f'Model name: {modelName}')
|
||||
print(f'Target float type: {targetFloatTypeStr}')
|
||||
print(f'Target file: {outputFileName}')
|
||||
|
||||
convert(modelPath, outputFileName, targetFloatType)
|
||||
|
||||
print('Done!')
|
||||
Reference in New Issue
Block a user