init

2025-10-24 11:42:14 +02:00
commit 42172cbb6f
85 changed files with 40316 additions and 0 deletions
--- a/converter/convert-llama.py
+++ b/converter/convert-llama.py
@@ -0,0 +1,121 @@
+import os
+import sys
+import json
+import torch
+import math
+import numpy as np
+from writer import writeTensor, writeHeader, parseFloatType, strFloatType, FloatType
+from pathlib import Path
+
+LAYER_CHUNK_SIZE = 48
+
+def convert(modelPath, outputPath, targetFloatType):
+    paramsPath = os.path.join(modelPath, 'params.json')
+    with open(paramsPath) as f:
+        params = json.load(f)
+        if (params['vocab_size'] < 1):
+            raise Exception('vocab_size is invalid, please update params.json file')
+        if (params.get('max_seq_len') is None):
+            raise Exception('max_seq_len is required, please update params.json file')
+        params['n_kv_heads'] = params.get('n_kv_heads') or params['n_heads']
+        params['head_size'] = params['dim'] / params['n_heads']
+        params['arch_type'] = 0xABCD00
+        params['n_experts'] = 0
+        params['n_active_experts'] = 0
+        params['weights_float_type'] = targetFloatType
+        if ('rope_theta' in params):
+            params['rope_theta'] = int(params['rope_theta'])
+
+    modelPaths = sorted(list(Path(modelPath).glob('consolidated.*.pth')))
+    nSlices = len(modelPaths)
+
+    layers = []
+    layers.append('tok_embeddings.weight')
+    for layerIndex in range(0, params['n_layers']):
+        layers.append(f'layers.{layerIndex}.attention.wq.weight')
+        layers.append(f'layers.{layerIndex}.attention.wk.weight')
+        layers.append(f'layers.{layerIndex}.attention.wv.weight')
+        layers.append(f'layers.{layerIndex}.attention.wo.weight')
+        layers.append(f'layers.{layerIndex}.feed_forward.w1.weight')
+        layers.append(f'layers.{layerIndex}.feed_forward.w2.weight')
+        layers.append(f'layers.{layerIndex}.feed_forward.w3.weight')
+        layers.append(f'layers.{layerIndex}.attention_norm.weight')
+        layers.append(f'layers.{layerIndex}.ffn_norm.weight')
+    layers.append('norm.weight')
+    layers.append('output.weight')
+
+    isHeaderWrote = False
+    outFile = open(outputPath, 'wb')
+
+    nChunks = math.ceil(len(layers) / LAYER_CHUNK_SIZE)
+    for chunkIndex in range(0, nChunks):
+        chunkLayerNames = layers[LAYER_CHUNK_SIZE * chunkIndex:LAYER_CHUNK_SIZE * (chunkIndex + 1)]
+        models = {}
+        for layerName in chunkLayerNames:
+            models[layerName] = []
+
+        print(f'💿 Chunking model {chunkIndex + 1}/{nChunks}...')
+
+        for modelPath in modelPaths:
+            model = torch.load(modelPath, map_location='cpu')
+            for modelKey in model:
+                if (modelKey in chunkLayerNames):
+                    models[modelKey].append(model[modelKey])
+            if not isHeaderWrote:
+                params['hidden_dim'] = model['layers.0.feed_forward.w1.weight'].shape[0] * nSlices
+                writeHeader(outFile, params)
+                isHeaderWrote = True
+            del model
+
+        for layerName in chunkLayerNames:
+            if layerName == 'rope.freqs':
+                continue
+
+            isAxis1 = (
+                layerName == 'tok_embeddings.weight' or
+                layerName.endswith('.attention.wo.weight') or
+                layerName.endswith('.feed_forward.w2.weight')
+            )
+            isAlwaysF32 = (
+                layerName == 'tok_embeddings.weight' or
+                layerName.endswith('.attention_norm.weight') or
+                layerName.endswith('.ffn_norm.weight') or
+                layerName == 'norm.weight'
+            )
+            floatType = FloatType.F32 if isAlwaysF32 else targetFloatType
+
+            tensors = models[layerName]
+            if len(tensors) == 1 or len(tensors[0].shape) == 1:
+                tensor = tensors[0]
+            else:
+                tensor = torch.cat(tensors, dim=(1 if isAxis1 else 0))
+
+            print(f'🔶 Exporting {layerName} {tensor.shape}...')
+            writeTensor(outFile, tensor, floatType)
+
+        del models
+
+    outFile.close()
+
+def usage():
+    print('Usage: python convert-llama.py <modelPath> <targetFloatType>')
+    exit(1)
+
+if __name__ == '__main__':
+    if (len(sys.argv) < 3):
+        usage()
+
+    modelPath = sys.argv[1]
+    targetFloatType = parseFloatType(sys.argv[2])
+    targetFloatTypeStr = strFloatType(targetFloatType)
+
+    modelName = os.path.basename(modelPath)
+    outputFileName = f'dllama_model_{modelName.lower()}_{targetFloatTypeStr}.m'
+
+    print(f'Model name: {modelName}')
+    print(f'Target float type: {targetFloatTypeStr}')
+    print(f'Target file: {outputFileName}')
+
+    convert(modelPath, outputFileName, targetFloatType)
+
+    print('Done!')