submission_id: sao10k-mn-12b-lyra-v4a1_v8
developer_uid: chai_backend_admin
best_of: 8
celo_rating: 1256.96
display_name: sao10k-mn-12b-lyra-v4a1_v8
family_friendly_score: 0.6080904334828101
family_friendly_standard_error: 0.008155917434532353
formatter: {'memory_template': '<|im_start|>system\n{memory}<|im_end|>\n', 'prompt_template': '<|im_start|>user\n{prompt}<|im_end|>\n', 'bot_template': '<|im_start|>assistant\n{bot_name}: {message}<|im_end|>\n', 'user_template': '<|im_start|>user\n{user_name}: {message}<|im_end|>\n', 'response_template': '<|im_start|>assistant\n{bot_name}:', 'truncate_by_message': False}
generation_params: {'temperature': 0.75, 'top_p': 1.0, 'min_p': 0.1, 'top_k': 40, 'presence_penalty': 0.0, 'frequency_penalty': 0.0, 'stopping_words': ['\n', '\n\n', '\nYou:', '[/INST]', '<|im_end|>', '</s>'], 'max_input_tokens': 512, 'best_of': 8, 'max_output_tokens': 64}
gpu_counts: {'NVIDIA RTX A5000': 1}
is_internal_developer: True
language_model: Sao10K/MN-12B-Lyra-v4a1
latencies: [{'batch_size': 1, 'throughput': 0.7092399445997605, 'latency_mean': 1.4098974657058716, 'latency_p50': 1.4003853797912598, 'latency_p90': 1.5757660388946533}, {'batch_size': 3, 'throughput': 1.3433683877906344, 'latency_mean': 2.227378478050232, 'latency_p50': 2.2191730737686157, 'latency_p90': 2.487174558639526}, {'batch_size': 5, 'throughput': 1.5859415511200659, 'latency_mean': 3.1354493284225464, 'latency_p50': 3.160244941711426, 'latency_p90': 3.5427073001861573}, {'batch_size': 6, 'throughput': 1.6512814693587157, 'latency_mean': 3.6142790627479555, 'latency_p50': 3.5917463302612305, 'latency_p90': 4.134362053871155}, {'batch_size': 8, 'throughput': 1.6060531915901004, 'latency_mean': 4.958004584312439, 'latency_p50': 4.967382788658142, 'latency_p90': 5.564872860908508}, {'batch_size': 10, 'throughput': 1.5514405539771214, 'latency_mean': 6.3989794790744785, 'latency_p50': 6.467674732208252, 'latency_p90': 7.3490454912185665}]
max_input_tokens: 512
max_output_tokens: 64
model_architecture: MistralForCausalLM
model_group: Sao10K/MN-12B-Lyra-v4a1
model_name: sao10k-mn-12b-lyra-v4a1_v8
model_num_parameters: 12772070400.0
model_repo: Sao10K/MN-12B-Lyra-v4a1
model_size: 13B
num_battles: 3687
num_wins: 1868
ranking_group: single
status: inactive
submission_type: basic
throughput_3p7s: 1.66
timestamp: 2024-09-27T18:49:33+00:00
us_pacific_date: 2024-09-27
win_ratio: 0.5066449688093301
Resubmit model
Shutdown handler not registered because Python interpreter is not running in the main thread
run pipeline %s
run pipeline stage %s
Running pipeline stage MKMLizer
Starting job with name sao10k-mn-12b-lyra-v4a1-v8-mkmlizer
Waiting for job on sao10k-mn-12b-lyra-v4a1-v8-mkmlizer to finish
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ╔═════════════════════════════════════════════════════════════════════╗
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ _____ __ __ ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ / _/ /_ ___ __/ / ___ ___ / / ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ / _/ / // / |/|/ / _ \/ -_) -_) / ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ /_//_/\_, /|__,__/_//_/\__/\__/_/ ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ /___/ ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ Version: 0.11.12 ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ Copyright 2023 MK ONE TECHNOLOGIES Inc. ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ https://mk1.ai ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ The license key for the current software has been verified as ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ belonging to: ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ Chai Research Corp. ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ Account ID: 7997a29f-0ceb-4cc7-9adf-840c57b4ae6f ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ Expiration: 2024-10-15 23:59:59 ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ║ ║
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: ╚═════════════════════════════════════════════════════════════════════╝
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: Downloaded to shared memory in 20.319s
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: quantizing model to /dev/shm/model_cache, profile:s0, folder:/tmp/tmp9dlcejs9, device:0
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: Saving flywheel model at /dev/shm/model_cache
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: quantized model in 30.969s
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: Processed model Sao10K/MN-12B-Lyra-v4a1 in 51.289s
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: creating bucket guanaco-mkml-models
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: Bucket 's3://guanaco-mkml-models/' created
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: uploading /dev/shm/model_cache to s3://guanaco-mkml-models/sao10k-mn-12b-lyra-v4a1-v8
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: cp /dev/shm/model_cache/config.json s3://guanaco-mkml-models/sao10k-mn-12b-lyra-v4a1-v8/config.json
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: cp /dev/shm/model_cache/special_tokens_map.json s3://guanaco-mkml-models/sao10k-mn-12b-lyra-v4a1-v8/special_tokens_map.json
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: cp /dev/shm/model_cache/tokenizer_config.json s3://guanaco-mkml-models/sao10k-mn-12b-lyra-v4a1-v8/tokenizer_config.json
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: cp /dev/shm/model_cache/tokenizer.json s3://guanaco-mkml-models/sao10k-mn-12b-lyra-v4a1-v8/tokenizer.json
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: cp /dev/shm/model_cache/flywheel_model.0.safetensors s3://guanaco-mkml-models/sao10k-mn-12b-lyra-v4a1-v8/flywheel_model.0.safetensors
sao10k-mn-12b-lyra-v4a1-v8-mkmlizer: Loading 0: 0%| | 0/363 [00:00<?, ?it/s]Loading 0: 1%| | 2/363 [00:05<15:57, 2.65s/it]Loading 0: 2%|▏ | 6/363 [00:05<04:14, 1.40it/s]Loading 0: 4%|▍ | 14/363 [00:05<01:23, 4.19it/s]Loading 0: 6%|▌ | 20/363 [00:05<00:50, 6.85it/s]Loading 0: 7%|▋ | 27/363 [00:05<00:30, 10.87it/s]Loading 0: 9%|▉ | 33/363 [00:05<00:22, 14.39it/s]Loading 0: 12%|█▏ | 42/363 [00:06<00:15, 20.96it/s]Loading 0: 14%|█▍ | 51/363 [00:06<00:11, 27.48it/s]Loading 0: 16%|█▌ | 58/363 [00:06<00:10, 28.16it/s]Loading 0: 17%|█▋ | 63/363 [00:06<00:09, 30.79it/s]Loading 0: 19%|█▉ | 69/363 [00:06<00:08, 32.92it/s]Loading 0: 21%|██▏ | 78/363 [00:06<00:07, 39.08it/s]Loading 0: 24%|██▍ | 87/363 [00:07<00:06, 43.98it/s]Loading 0: 26%|██▌ | 95/363 [00:07<00:05, 51.02it/s]Loading 0: 28%|██▊ | 101/363 [00:07<00:05, 50.53it/s]Loading 0: 29%|██▉ | 107/363 [00:07<00:04, 51.67it/s]Loading 0: 31%|███▏ | 114/363 [00:07<00:05, 48.36it/s]Loading 0: 34%|███▎ | 122/363 [00:07<00:04, 55.14it/s]Loading 0: 35%|███▌ | 128/363 [00:07<00:04, 53.24it/s]Loading 0: 37%|███▋ | 134/363 [00:07<00:04, 53.58it/s]Loading 0: 39%|███▉ | 141/363 [00:08<00:04, 49.82it/s]Loading 0: 41%|████ | 149/363 [00:08<00:03, 56.76it/s]Loading 0: 43%|████▎ | 155/363 [00:08<00:03, 54.09it/s]Loading 0: 44%|████▍ | 161/363 [00:08<00:05, 38.40it/s]Loading 0: 46%|████▋ | 168/363 [00:08<00:04, 39.85it/s]Loading 0: 48%|████▊ | 176/363 [00:08<00:03, 47.87it/s]Loading 0: 50%|█████ | 182/363 [00:08<00:03, 48.02it/s]Loading 0: 52%|█████▏ | 188/363 [00:09<00:03, 49.46it/s]Loading 0: 54%|█████▎ | 195/363 [00:09<00:03, 47.57it/s]Loading 0: 56%|█████▌ | 203/363 [00:09<00:02, 55.03it/s]Loading 0: 58%|█████▊ | 209/363 [00:09<00:02, 53.46it/s]Loading 0: 59%|█████▉ | 215/363 [00:09<00:02, 54.26it/s]Loading 0: 61%|██████ | 222/363 [00:09<00:02, 50.69it/s]Loading 0: 63%|██████▎ | 230/363 [00:09<00:02, 57.63it/s]Loading 0: 65%|██████▌ | 237/363 [00:09<00:02, 57.70it/s]Loading 0: 67%|██████▋ | 243/363 [00:10<00:02, 53.21it/s]Loading 0: 69%|██████▊ | 249/363 [00:10<00:02, 48.54it/s]Loading 0: 71%|███████ | 256/363 [00:10<00:02, 40.92it/s]Loading 0: 72%|███████▏ | 261/363 [00:10<00:02, 41.34it/s]Loading 0: 74%|███████▎ | 267/363 [00:10<00:02, 40.70it/s]Loading 0: 76%|███████▌ | 275/363 [00:10<00:01, 49.27it/s]Loading 0: 77%|███████▋ | 281/363 [00:10<00:01, 48.99it/s]Loading 0: 79%|███████▉ | 287/363 [00:11<00:01, 50.76it/s]Loading 0: 81%|████████ | 294/363 [00:11<00:01, 48.41it/s]Loading 0: 83%|████████▎ | 302/363 [00:11<00:01, 55.73it/s]Loading 0: 85%|████████▍ | 308/363 [00:11<00:01, 53.55it/s]Loading 0: 87%|████████▋ | 314/363 [00:11<00:00, 54.06it/s]Loading 0: 88%|████████▊ | 320/363 [00:11<00:00, 55.43it/s]Loading 0: 90%|████████▉ | 326/363 [00:11<00:00, 52.46it/s]Loading 0: 91%|█████████▏| 332/363 [00:11<00:00, 53.50it/s]Loading 0: 93%|█████████▎| 339/363 [00:12<00:00, 50.34it/s]Loading 0: 96%|█████████▌| 347/363 [00:12<00:00, 57.58it/s]Loading 0: 97%|█████████▋| 353/363 [00:12<00:00, 54.78it/s]Loading 0: 99%|█████████▉| 359/363 [00:12<00:00, 38.05it/s]
Job sao10k-mn-12b-lyra-v4a1-v8-mkmlizer completed after 69.44s with status: succeeded
Stopping job with name sao10k-mn-12b-lyra-v4a1-v8-mkmlizer
Pipeline stage MKMLizer completed in 71.58s
run pipeline stage %s
Running pipeline stage MKMLTemplater
Pipeline stage MKMLTemplater completed in 0.57s
run pipeline stage %s
Running pipeline stage MKMLDeployer
Creating inference service sao10k-mn-12b-lyra-v4a1-v8
Waiting for inference service sao10k-mn-12b-lyra-v4a1-v8 to be ready
Inference service sao10k-mn-12b-lyra-v4a1-v8 ready after 30.828721046447754s
Pipeline stage MKMLDeployer completed in 33.15s
run pipeline stage %s
Running pipeline stage StressChecker
HTTP Request: %s %s "%s %d %s"
Received healthy response to inference request in 3.445847988128662s
HTTP Request: %s %s "%s %d %s"
Received healthy response to inference request in 2.389241933822632s
HTTP Request: %s %s "%s %d %s"
Received healthy response to inference request in 2.292288064956665s
HTTP Request: %s %s "%s %d %s"
Received healthy response to inference request in 2.4723358154296875s
HTTP Request: %s %s "%s %d %s"
Received healthy response to inference request in 2.1357779502868652s
5 requests
0 failed requests
5th percentile: 2.167079973220825
10th percentile: 2.1983819961547852
20th percentile: 2.2609860420227053
30th percentile: 2.3116788387298586
40th percentile: 2.350460386276245
50th percentile: 2.389241933822632
60th percentile: 2.422479486465454
70th percentile: 2.4557170391082765
80th percentile: 2.6670382499694827
90th percentile: 3.056443119049072
95th percentile: 3.251145553588867
99th percentile: 3.406907501220703
mean time: 2.5470983505249025
Pipeline stage StressChecker completed in 17.55s
run pipeline stage %s
Running pipeline stage TriggerMKMLProfilingPipeline
run_pipeline:run_in_cloud %s
starting trigger_guanaco_pipeline args=%s
Pipeline stage TriggerMKMLProfilingPipeline completed in 2.69s
Shutdown handler de-registered
sao10k-mn-12b-lyra-v4a1_v8 status is now deployed due to DeploymentManager action
Shutdown handler registered
run pipeline %s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeleter
Skipping teardown as no inference service was successfully deployed
Pipeline stage MKMLProfilerDeleter completed in 0.11s
run pipeline stage %s
Running pipeline stage MKMLProfilerTemplater
Pipeline stage MKMLProfilerTemplater completed in 0.10s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeployer
Creating inference service sao10k-mn-12b-lyra-v4a1-v8-profiler
Waiting for inference service sao10k-mn-12b-lyra-v4a1-v8-profiler to be ready
Inference service sao10k-mn-12b-lyra-v4a1-v8-profiler ready after 230.51275038719177s
Pipeline stage MKMLProfilerDeployer completed in 230.84s
run pipeline stage %s
Running pipeline stage MKMLProfilerRunner
kubectl cp /code/guanaco/guanaco_inference_services/src/inference_scripts tenant-chaiml-guanaco/sao10k-mn-12b-lyra-v4a1-v8-profiler-predictor-00001-deploytskpc:/code/chaiverse_profiler_1727463385 --namespace tenant-chaiml-guanaco
kubectl exec -it sao10k-mn-12b-lyra-v4a1-v8-profiler-predictor-00001-deploytskpc --namespace tenant-chaiml-guanaco -- sh -c 'cd /code/chaiverse_profiler_1727463385 && python profiles.py profile --best_of_n 8 --auto_batch 5 --batches 1,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,105,110,115,120,125,130,135,140,145,150,155,160,165,170,175,180,185,190,195 --samples 200 --input_tokens 512 --output_tokens 64 --summary /code/chaiverse_profiler_1727463385/summary.json'
kubectl exec -it sao10k-mn-12b-lyra-v4a1-v8-profiler-predictor-00001-deploytskpc --namespace tenant-chaiml-guanaco -- bash -c 'cat /code/chaiverse_profiler_1727463385/summary.json'
Pipeline stage MKMLProfilerRunner completed in 936.80s
run pipeline stage %s
Running pipeline stage MKMLProfilerDeleter
Checking if service sao10k-mn-12b-lyra-v4a1-v8-profiler is running
Tearing down inference service sao10k-mn-12b-lyra-v4a1-v8-profiler
Service sao10k-mn-12b-lyra-v4a1-v8-profiler has been torndown
Pipeline stage MKMLProfilerDeleter completed in 2.27s
Shutdown handler de-registered
sao10k-mn-12b-lyra-v4a1_v8 status is now inactive due to auto deactivation removed underperforming models