orionweller commited on
Commit
1fd4bba
1 Parent(s): 739bfda

Automated Leaderboard Update

Browse files
all_data_tasks/0/default.jsonl CHANGED
@@ -1,18 +1,18 @@
1
  {"index":13,"Rank":1,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/GritLM\/GritLM-7B\">GritLM-7B<\/a>","Model Size (Million Parameters)":7240,"Memory Usage (GB, fp32)":26.97,"Average":35.2,"ARCChallenge":26.68,"AlphaNLI":34.0,"HellaSwag":39.45,"PIQA":44.35,"Quail":11.69,"RARbCode":84.0,"RARbMath":82.35,"SIQA":7.23,"SpartQA":9.29,"TempReasonL1":7.15,"TempReasonL2Fact":58.38,"TempReasonL2Pure":11.22,"TempReasonL3Fact":44.29,"TempReasonL3Pure":14.15,"WinoGrande":53.74}
2
- {"index":25,"Rank":2,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-large-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":31.13,"ARCChallenge":21.22,"AlphaNLI":34.23,"HellaSwag":31.4,"PIQA":37.52,"Quail":13.6,"RARbCode":89.41,"RARbMath":87.73,"SIQA":4.99,"SpartQA":7.45,"TempReasonL1":2.07,"TempReasonL2Fact":39.77,"TempReasonL2Pure":11.04,"TempReasonL3Fact":37.04,"TempReasonL3Pure":15.51,"WinoGrande":33.92}
3
  {"index":12,"Rank":3,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/GritLM\/GritLM-7B\">GritLM-7B-noinstruct<\/a>","Model Size (Million Parameters)":7240,"Memory Usage (GB, fp32)":26.97,"Average":30.57,"ARCChallenge":16.57,"AlphaNLI":29.56,"HellaSwag":36.03,"PIQA":35.8,"Quail":8.68,"RARbCode":83.14,"RARbMath":83.01,"SIQA":5.73,"SpartQA":1.56,"TempReasonL1":2.57,"TempReasonL2Fact":48.25,"TempReasonL2Pure":8.98,"TempReasonL3Fact":34.11,"TempReasonL3Pure":12.44,"WinoGrande":52.12}
4
- {"index":26,"Rank":4,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-large<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":29.95,"ARCChallenge":23.98,"AlphaNLI":37.27,"HellaSwag":34.12,"PIQA":41.96,"Quail":10.15,"RARbCode":89.64,"RARbMath":90.08,"SIQA":3.44,"SpartQA":7.51,"TempReasonL1":2.13,"TempReasonL2Fact":28.65,"TempReasonL2Pure":10.34,"TempReasonL3Fact":25.52,"TempReasonL3Pure":15.28,"WinoGrande":29.11}
5
  {"index":16,"Rank":5,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/intfloat\/e5-mistral-7b-instruct\">e5-mistral-7b-instruct<\/a>","Model Size (Million Parameters)":7111,"Memory Usage (GB, fp32)":26.49,"Average":28.41,"ARCChallenge":17.81,"AlphaNLI":26.12,"HellaSwag":34.85,"PIQA":39.37,"Quail":7.01,"RARbCode":78.46,"RARbMath":72.16,"SIQA":5.42,"SpartQA":9.92,"TempReasonL1":3.31,"TempReasonL2Fact":36.9,"TempReasonL2Pure":9.18,"TempReasonL3Fact":30.18,"TempReasonL3Pure":14.31,"WinoGrande":41.21}
6
  {"index":17,"Rank":6,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/intfloat\/e5-mistral-7b-instruct-noinstruct\">e5-mistral-7b-instruct-noinstruct<\/a>","Model Size (Million Parameters)":7111,"Memory Usage (GB, fp32)":26.49,"Average":28.04,"ARCChallenge":20.48,"AlphaNLI":18.88,"HellaSwag":32.25,"PIQA":32.8,"Quail":6.25,"RARbCode":79.84,"RARbMath":76.19,"SIQA":5.08,"SpartQA":10.87,"TempReasonL1":3.04,"TempReasonL2Fact":35.63,"TempReasonL2Pure":9.32,"TempReasonL3Fact":30.41,"TempReasonL3Pure":14.39,"WinoGrande":45.18}
7
  {"index":10,"Rank":7,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/Cohere\/Cohere-embed-english-v3.0\">Cohere-embed-english-v3.0-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":25.41,"ARCChallenge":10.1,"AlphaNLI":18.75,"HellaSwag":29.02,"PIQA":27.89,"Quail":7.77,"RARbCode":56.56,"RARbMath":72.05,"SIQA":5.03,"SpartQA":3.33,"TempReasonL1":1.43,"TempReasonL2Fact":40.46,"TempReasonL2Pure":2.39,"TempReasonL3Fact":33.87,"TempReasonL3Pure":7.52,"WinoGrande":65.02}
8
- {"index":28,"Rank":8,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-small<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":24.2,"ARCChallenge":14.63,"AlphaNLI":30.61,"HellaSwag":30.94,"PIQA":33.69,"Quail":6.11,"RARbCode":72.03,"RARbMath":71.07,"SIQA":3.03,"SpartQA":6.63,"TempReasonL1":2.35,"TempReasonL2Fact":25.68,"TempReasonL2Pure":2.76,"TempReasonL3Fact":22.09,"TempReasonL3Pure":9.79,"WinoGrande":31.53}
9
  {"index":11,"Rank":9,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/Cohere\/Cohere-embed-english-v3.0\">Cohere-embed-english-v3.0<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":23.65,"ARCChallenge":9.89,"AlphaNLI":15.1,"HellaSwag":26.35,"PIQA":28.49,"Quail":4.1,"RARbCode":57.19,"RARbMath":72.26,"SIQA":4.26,"SpartQA":3.75,"TempReasonL1":1.5,"TempReasonL2Fact":35.91,"TempReasonL2Pure":1.89,"TempReasonL3Fact":27.51,"TempReasonL3Pure":8.53,"WinoGrande":58.01}
10
- {"index":24,"Rank":10,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-and-improved-embedding-model\">text-embedding-ada-002<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":22.57,"ARCChallenge":13.3,"AlphaNLI":25.65,"HellaSwag":29.29,"PIQA":31.02,"Quail":5.83,"RARbCode":83.39,"RARbMath":73.21,"SIQA":3.14,"SpartQA":4.23,"TempReasonL1":1.68,"TempReasonL2Fact":19.93,"TempReasonL2Pure":2.6,"TempReasonL3Fact":18.02,"TempReasonL3Pure":7.58,"WinoGrande":19.65}
11
- {"index":27,"Rank":11,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-small-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":22.09,"ARCChallenge":13.76,"AlphaNLI":21.14,"HellaSwag":27.2,"PIQA":29.59,"Quail":6.64,"RARbCode":72.14,"RARbMath":64.31,"SIQA":2.98,"SpartQA":3.58,"TempReasonL1":2.29,"TempReasonL2Fact":26.34,"TempReasonL2Pure":3.17,"TempReasonL3Fact":22.72,"TempReasonL3Pure":9.98,"WinoGrande":25.49}
12
  {"index":7,"Rank":12,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-m3\">bge-m3<\/a>","Model Size (Million Parameters)":2270,"Memory Usage (GB, fp32)":8.46,"Average":21.48,"ARCChallenge":9.02,"AlphaNLI":24.73,"HellaSwag":25.67,"PIQA":22.93,"Quail":7.51,"RARbCode":38.8,"RARbMath":69.19,"SIQA":4.89,"SpartQA":7.49,"TempReasonL1":0.99,"TempReasonL2Fact":33.23,"TempReasonL2Pure":0.68,"TempReasonL3Fact":30.05,"TempReasonL3Pure":5.28,"WinoGrande":41.72}
13
  {"index":6,"Rank":13,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-m3\">bge-m3-instruct<\/a>","Model Size (Million Parameters)":2270,"Memory Usage (GB, fp32)":8.46,"Average":20.83,"ARCChallenge":9.03,"AlphaNLI":24.69,"HellaSwag":25.55,"PIQA":19.03,"Quail":7.08,"RARbCode":39.58,"RARbMath":64.51,"SIQA":4.77,"SpartQA":7.0,"TempReasonL1":0.8,"TempReasonL2Fact":34.99,"TempReasonL2Pure":0.62,"TempReasonL3Fact":32.47,"TempReasonL3Pure":7.01,"WinoGrande":35.33}
14
  {"index":20,"Rank":14,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/sentence-transformers\/all-MiniLM-L6-v2\">all-MiniLM-L6-v2<\/a>","Model Size (Million Parameters)":23,"Memory Usage (GB, fp32)":0.09,"Average":19.61,"ARCChallenge":9.48,"AlphaNLI":28.19,"HellaSwag":24.21,"PIQA":25.28,"Quail":3.92,"RARbCode":44.27,"RARbMath":68.19,"SIQA":1.56,"SpartQA":1.65,"TempReasonL1":1.53,"TempReasonL2Fact":17.65,"TempReasonL2Pure":0.46,"TempReasonL3Fact":14.16,"TempReasonL3Pure":6.33,"WinoGrande":47.33}
15
- {"index":23,"Rank":15,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-and-improved-embedding-model\">text-embedding-ada-002-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":19.56,"ARCChallenge":11.85,"AlphaNLI":10.62,"HellaSwag":24.8,"PIQA":23.87,"Quail":5.79,"RARbCode":82.36,"RARbMath":67.26,"SIQA":2.64,"SpartQA":4.75,"TempReasonL1":1.44,"TempReasonL2Fact":19.38,"TempReasonL2Pure":2.43,"TempReasonL3Fact":17.58,"TempReasonL3Pure":7.31,"WinoGrande":11.36}
16
  {"index":1,"Rank":16,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/github.com\/facebookresearch\/dpr-scale\/tree\/main\/dragon\">dragon-plus<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":19.1,"ARCChallenge":8.91,"AlphaNLI":32.1,"HellaSwag":27.69,"PIQA":28.01,"Quail":4.09,"RARbCode":17.58,"RARbMath":45.09,"SIQA":2.0,"SpartQA":10.34,"TempReasonL1":1.82,"TempReasonL2Fact":17.45,"TempReasonL2Pure":0.55,"TempReasonL3Fact":15.71,"TempReasonL3Pure":7.97,"WinoGrande":67.18}
17
  {"index":22,"Rank":17,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/sentence-transformers\/all-mpnet-base-v2\">all-mpnet-base-v2<\/a>","Model Size (Million Parameters)":110,"Memory Usage (GB, fp32)":0.41,"Average":18.03,"ARCChallenge":11.8,"AlphaNLI":22.41,"HellaSwag":26.27,"PIQA":29.03,"Quail":3.41,"RARbCode":53.21,"RARbMath":71.85,"SIQA":2.38,"SpartQA":0.22,"TempReasonL1":1.77,"TempReasonL2Fact":11.2,"TempReasonL2Pure":1.15,"TempReasonL3Fact":9.42,"TempReasonL3Pure":5.59,"WinoGrande":20.8}
18
  {"index":5,"Rank":18,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-large-en-v1.5\">bge-large-en-v1.5<\/a>","Model Size (Million Parameters)":1340,"Memory Usage (GB, fp32)":4.99,"Average":17.7,"ARCChallenge":9.99,"AlphaNLI":13.13,"HellaSwag":28.5,"PIQA":27.99,"Quail":1.83,"RARbCode":48.12,"RARbMath":57.36,"SIQA":1.04,"SpartQA":2.99,"TempReasonL1":1.46,"TempReasonL2Fact":24.25,"TempReasonL2Pure":2.35,"TempReasonL3Fact":20.64,"TempReasonL3Pure":6.67,"WinoGrande":19.18}
@@ -27,3 +27,4 @@
27
  {"index":2,"Rank":27,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-base-en-v1.5\">bge-base-en-v1.5-instruct<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":13.52,"ARCChallenge":8.85,"AlphaNLI":4.13,"HellaSwag":24.03,"PIQA":23.03,"Quail":1.25,"RARbCode":46.32,"RARbMath":45.62,"SIQA":0.24,"SpartQA":2.67,"TempReasonL1":0.8,"TempReasonL2Fact":16.56,"TempReasonL2Pure":1.33,"TempReasonL3Fact":12.68,"TempReasonL3Pure":5.08,"WinoGrande":10.27}
28
  {"index":8,"Rank":28,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-small-en-v1.5\">bge-small-en-v1.5-instruct<\/a>","Model Size (Million Parameters)":24,"Memory Usage (GB, fp32)":0.09,"Average":12.6,"ARCChallenge":7.72,"AlphaNLI":1.26,"HellaSwag":23.41,"PIQA":20.79,"Quail":2.01,"RARbCode":41.52,"RARbMath":46.5,"SIQA":0.98,"SpartQA":2.86,"TempReasonL1":1.27,"TempReasonL2Fact":16.72,"TempReasonL2Pure":1.1,"TempReasonL3Fact":12.81,"TempReasonL3Pure":4.63,"WinoGrande":5.35}
29
  {"index":14,"Rank":29,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/facebook\/contriever\">contriever-instruct<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":"","ARCChallenge":7.63,"AlphaNLI":27.09,"HellaSwag":"","PIQA":21.73,"Quail":4.92,"RARbCode":7.12,"RARbMath":21.83,"SIQA":0.88,"SpartQA":10.56,"TempReasonL1":1.8,"TempReasonL2Fact":22.03,"TempReasonL2Pure":0.94,"TempReasonL3Fact":20.82,"TempReasonL3Pure":7.15,"WinoGrande":26.3}
 
 
1
  {"index":13,"Rank":1,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/GritLM\/GritLM-7B\">GritLM-7B<\/a>","Model Size (Million Parameters)":7240,"Memory Usage (GB, fp32)":26.97,"Average":35.2,"ARCChallenge":26.68,"AlphaNLI":34.0,"HellaSwag":39.45,"PIQA":44.35,"Quail":11.69,"RARbCode":84.0,"RARbMath":82.35,"SIQA":7.23,"SpartQA":9.29,"TempReasonL1":7.15,"TempReasonL2Fact":58.38,"TempReasonL2Pure":11.22,"TempReasonL3Fact":44.29,"TempReasonL3Pure":14.15,"WinoGrande":53.74}
2
+ {"index":26,"Rank":2,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-large-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":31.13,"ARCChallenge":21.22,"AlphaNLI":34.23,"HellaSwag":31.4,"PIQA":37.52,"Quail":13.6,"RARbCode":89.41,"RARbMath":87.73,"SIQA":4.99,"SpartQA":7.45,"TempReasonL1":2.07,"TempReasonL2Fact":39.77,"TempReasonL2Pure":11.04,"TempReasonL3Fact":37.04,"TempReasonL3Pure":15.51,"WinoGrande":33.92}
3
  {"index":12,"Rank":3,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/GritLM\/GritLM-7B\">GritLM-7B-noinstruct<\/a>","Model Size (Million Parameters)":7240,"Memory Usage (GB, fp32)":26.97,"Average":30.57,"ARCChallenge":16.57,"AlphaNLI":29.56,"HellaSwag":36.03,"PIQA":35.8,"Quail":8.68,"RARbCode":83.14,"RARbMath":83.01,"SIQA":5.73,"SpartQA":1.56,"TempReasonL1":2.57,"TempReasonL2Fact":48.25,"TempReasonL2Pure":8.98,"TempReasonL3Fact":34.11,"TempReasonL3Pure":12.44,"WinoGrande":52.12}
4
+ {"index":27,"Rank":4,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-large<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":29.95,"ARCChallenge":23.98,"AlphaNLI":37.27,"HellaSwag":34.12,"PIQA":41.96,"Quail":10.15,"RARbCode":89.64,"RARbMath":90.08,"SIQA":3.44,"SpartQA":7.51,"TempReasonL1":2.13,"TempReasonL2Fact":28.65,"TempReasonL2Pure":10.34,"TempReasonL3Fact":25.52,"TempReasonL3Pure":15.28,"WinoGrande":29.11}
5
  {"index":16,"Rank":5,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/intfloat\/e5-mistral-7b-instruct\">e5-mistral-7b-instruct<\/a>","Model Size (Million Parameters)":7111,"Memory Usage (GB, fp32)":26.49,"Average":28.41,"ARCChallenge":17.81,"AlphaNLI":26.12,"HellaSwag":34.85,"PIQA":39.37,"Quail":7.01,"RARbCode":78.46,"RARbMath":72.16,"SIQA":5.42,"SpartQA":9.92,"TempReasonL1":3.31,"TempReasonL2Fact":36.9,"TempReasonL2Pure":9.18,"TempReasonL3Fact":30.18,"TempReasonL3Pure":14.31,"WinoGrande":41.21}
6
  {"index":17,"Rank":6,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/intfloat\/e5-mistral-7b-instruct-noinstruct\">e5-mistral-7b-instruct-noinstruct<\/a>","Model Size (Million Parameters)":7111,"Memory Usage (GB, fp32)":26.49,"Average":28.04,"ARCChallenge":20.48,"AlphaNLI":18.88,"HellaSwag":32.25,"PIQA":32.8,"Quail":6.25,"RARbCode":79.84,"RARbMath":76.19,"SIQA":5.08,"SpartQA":10.87,"TempReasonL1":3.04,"TempReasonL2Fact":35.63,"TempReasonL2Pure":9.32,"TempReasonL3Fact":30.41,"TempReasonL3Pure":14.39,"WinoGrande":45.18}
7
  {"index":10,"Rank":7,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/Cohere\/Cohere-embed-english-v3.0\">Cohere-embed-english-v3.0-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":25.41,"ARCChallenge":10.1,"AlphaNLI":18.75,"HellaSwag":29.02,"PIQA":27.89,"Quail":7.77,"RARbCode":56.56,"RARbMath":72.05,"SIQA":5.03,"SpartQA":3.33,"TempReasonL1":1.43,"TempReasonL2Fact":40.46,"TempReasonL2Pure":2.39,"TempReasonL3Fact":33.87,"TempReasonL3Pure":7.52,"WinoGrande":65.02}
8
+ {"index":29,"Rank":8,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-small<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":24.2,"ARCChallenge":14.63,"AlphaNLI":30.61,"HellaSwag":30.94,"PIQA":33.69,"Quail":6.11,"RARbCode":72.03,"RARbMath":71.07,"SIQA":3.03,"SpartQA":6.63,"TempReasonL1":2.35,"TempReasonL2Fact":25.68,"TempReasonL2Pure":2.76,"TempReasonL3Fact":22.09,"TempReasonL3Pure":9.79,"WinoGrande":31.53}
9
  {"index":11,"Rank":9,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/Cohere\/Cohere-embed-english-v3.0\">Cohere-embed-english-v3.0<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":23.65,"ARCChallenge":9.89,"AlphaNLI":15.1,"HellaSwag":26.35,"PIQA":28.49,"Quail":4.1,"RARbCode":57.19,"RARbMath":72.26,"SIQA":4.26,"SpartQA":3.75,"TempReasonL1":1.5,"TempReasonL2Fact":35.91,"TempReasonL2Pure":1.89,"TempReasonL3Fact":27.51,"TempReasonL3Pure":8.53,"WinoGrande":58.01}
10
+ {"index":25,"Rank":10,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-and-improved-embedding-model\">text-embedding-ada-002<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":22.57,"ARCChallenge":13.3,"AlphaNLI":25.65,"HellaSwag":29.29,"PIQA":31.02,"Quail":5.83,"RARbCode":83.39,"RARbMath":73.21,"SIQA":3.14,"SpartQA":4.23,"TempReasonL1":1.68,"TempReasonL2Fact":19.93,"TempReasonL2Pure":2.6,"TempReasonL3Fact":18.02,"TempReasonL3Pure":7.58,"WinoGrande":19.65}
11
+ {"index":28,"Rank":11,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-small-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":22.09,"ARCChallenge":13.76,"AlphaNLI":21.14,"HellaSwag":27.2,"PIQA":29.59,"Quail":6.64,"RARbCode":72.14,"RARbMath":64.31,"SIQA":2.98,"SpartQA":3.58,"TempReasonL1":2.29,"TempReasonL2Fact":26.34,"TempReasonL2Pure":3.17,"TempReasonL3Fact":22.72,"TempReasonL3Pure":9.98,"WinoGrande":25.49}
12
  {"index":7,"Rank":12,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-m3\">bge-m3<\/a>","Model Size (Million Parameters)":2270,"Memory Usage (GB, fp32)":8.46,"Average":21.48,"ARCChallenge":9.02,"AlphaNLI":24.73,"HellaSwag":25.67,"PIQA":22.93,"Quail":7.51,"RARbCode":38.8,"RARbMath":69.19,"SIQA":4.89,"SpartQA":7.49,"TempReasonL1":0.99,"TempReasonL2Fact":33.23,"TempReasonL2Pure":0.68,"TempReasonL3Fact":30.05,"TempReasonL3Pure":5.28,"WinoGrande":41.72}
13
  {"index":6,"Rank":13,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-m3\">bge-m3-instruct<\/a>","Model Size (Million Parameters)":2270,"Memory Usage (GB, fp32)":8.46,"Average":20.83,"ARCChallenge":9.03,"AlphaNLI":24.69,"HellaSwag":25.55,"PIQA":19.03,"Quail":7.08,"RARbCode":39.58,"RARbMath":64.51,"SIQA":4.77,"SpartQA":7.0,"TempReasonL1":0.8,"TempReasonL2Fact":34.99,"TempReasonL2Pure":0.62,"TempReasonL3Fact":32.47,"TempReasonL3Pure":7.01,"WinoGrande":35.33}
14
  {"index":20,"Rank":14,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/sentence-transformers\/all-MiniLM-L6-v2\">all-MiniLM-L6-v2<\/a>","Model Size (Million Parameters)":23,"Memory Usage (GB, fp32)":0.09,"Average":19.61,"ARCChallenge":9.48,"AlphaNLI":28.19,"HellaSwag":24.21,"PIQA":25.28,"Quail":3.92,"RARbCode":44.27,"RARbMath":68.19,"SIQA":1.56,"SpartQA":1.65,"TempReasonL1":1.53,"TempReasonL2Fact":17.65,"TempReasonL2Pure":0.46,"TempReasonL3Fact":14.16,"TempReasonL3Pure":6.33,"WinoGrande":47.33}
15
+ {"index":24,"Rank":15,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-and-improved-embedding-model\">text-embedding-ada-002-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":19.56,"ARCChallenge":11.85,"AlphaNLI":10.62,"HellaSwag":24.8,"PIQA":23.87,"Quail":5.79,"RARbCode":82.36,"RARbMath":67.26,"SIQA":2.64,"SpartQA":4.75,"TempReasonL1":1.44,"TempReasonL2Fact":19.38,"TempReasonL2Pure":2.43,"TempReasonL3Fact":17.58,"TempReasonL3Pure":7.31,"WinoGrande":11.36}
16
  {"index":1,"Rank":16,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/github.com\/facebookresearch\/dpr-scale\/tree\/main\/dragon\">dragon-plus<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":19.1,"ARCChallenge":8.91,"AlphaNLI":32.1,"HellaSwag":27.69,"PIQA":28.01,"Quail":4.09,"RARbCode":17.58,"RARbMath":45.09,"SIQA":2.0,"SpartQA":10.34,"TempReasonL1":1.82,"TempReasonL2Fact":17.45,"TempReasonL2Pure":0.55,"TempReasonL3Fact":15.71,"TempReasonL3Pure":7.97,"WinoGrande":67.18}
17
  {"index":22,"Rank":17,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/sentence-transformers\/all-mpnet-base-v2\">all-mpnet-base-v2<\/a>","Model Size (Million Parameters)":110,"Memory Usage (GB, fp32)":0.41,"Average":18.03,"ARCChallenge":11.8,"AlphaNLI":22.41,"HellaSwag":26.27,"PIQA":29.03,"Quail":3.41,"RARbCode":53.21,"RARbMath":71.85,"SIQA":2.38,"SpartQA":0.22,"TempReasonL1":1.77,"TempReasonL2Fact":11.2,"TempReasonL2Pure":1.15,"TempReasonL3Fact":9.42,"TempReasonL3Pure":5.59,"WinoGrande":20.8}
18
  {"index":5,"Rank":18,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-large-en-v1.5\">bge-large-en-v1.5<\/a>","Model Size (Million Parameters)":1340,"Memory Usage (GB, fp32)":4.99,"Average":17.7,"ARCChallenge":9.99,"AlphaNLI":13.13,"HellaSwag":28.5,"PIQA":27.99,"Quail":1.83,"RARbCode":48.12,"RARbMath":57.36,"SIQA":1.04,"SpartQA":2.99,"TempReasonL1":1.46,"TempReasonL2Fact":24.25,"TempReasonL2Pure":2.35,"TempReasonL3Fact":20.64,"TempReasonL3Pure":6.67,"WinoGrande":19.18}
 
27
  {"index":2,"Rank":27,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-base-en-v1.5\">bge-base-en-v1.5-instruct<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":13.52,"ARCChallenge":8.85,"AlphaNLI":4.13,"HellaSwag":24.03,"PIQA":23.03,"Quail":1.25,"RARbCode":46.32,"RARbMath":45.62,"SIQA":0.24,"SpartQA":2.67,"TempReasonL1":0.8,"TempReasonL2Fact":16.56,"TempReasonL2Pure":1.33,"TempReasonL3Fact":12.68,"TempReasonL3Pure":5.08,"WinoGrande":10.27}
28
  {"index":8,"Rank":28,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-small-en-v1.5\">bge-small-en-v1.5-instruct<\/a>","Model Size (Million Parameters)":24,"Memory Usage (GB, fp32)":0.09,"Average":12.6,"ARCChallenge":7.72,"AlphaNLI":1.26,"HellaSwag":23.41,"PIQA":20.79,"Quail":2.01,"RARbCode":41.52,"RARbMath":46.5,"SIQA":0.98,"SpartQA":2.86,"TempReasonL1":1.27,"TempReasonL2Fact":16.72,"TempReasonL2Pure":1.1,"TempReasonL3Fact":12.81,"TempReasonL3Pure":4.63,"WinoGrande":5.35}
29
  {"index":14,"Rank":29,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/facebook\/contriever\">contriever-instruct<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":"","ARCChallenge":7.63,"AlphaNLI":27.09,"HellaSwag":"","PIQA":21.73,"Quail":4.92,"RARbCode":7.12,"RARbMath":21.83,"SIQA":0.88,"SpartQA":10.56,"TempReasonL1":1.8,"TempReasonL2Fact":22.03,"TempReasonL2Pure":0.94,"TempReasonL3Fact":20.82,"TempReasonL3Pure":7.15,"WinoGrande":26.3}
30
+ {"index":23,"Rank":30,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/sergeyzh\/rubert-tiny-turbo\">rubert-tiny-turbo<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":"","ARCChallenge":"","AlphaNLI":"","HellaSwag":"","PIQA":"","Quail":"","RARbCode":"","RARbMath":"","SIQA":"","SpartQA":"","TempReasonL1":"","TempReasonL2Fact":"","TempReasonL2Pure":"","TempReasonL3Fact":"","TempReasonL3Pure":"","WinoGrande":""}
boards_data/rar-b/data_tasks/Retrieval/default.jsonl CHANGED
@@ -1,18 +1,18 @@
1
  {"index":13,"Rank":1,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/GritLM\/GritLM-7B\">GritLM-7B<\/a>","Model Size (Million Parameters)":7240,"Memory Usage (GB, fp32)":26.97,"Average":35.2,"ARCChallenge":26.68,"AlphaNLI":34.0,"HellaSwag":39.45,"PIQA":44.35,"Quail":11.69,"RARbCode":84.0,"RARbMath":82.35,"SIQA":7.23,"SpartQA":9.29,"TempReasonL1":7.15,"TempReasonL2Fact":58.38,"TempReasonL2Pure":11.22,"TempReasonL3Fact":44.29,"TempReasonL3Pure":14.15,"WinoGrande":53.74}
2
- {"index":25,"Rank":2,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-large-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":31.13,"ARCChallenge":21.22,"AlphaNLI":34.23,"HellaSwag":31.4,"PIQA":37.52,"Quail":13.6,"RARbCode":89.41,"RARbMath":87.73,"SIQA":4.99,"SpartQA":7.45,"TempReasonL1":2.07,"TempReasonL2Fact":39.77,"TempReasonL2Pure":11.04,"TempReasonL3Fact":37.04,"TempReasonL3Pure":15.51,"WinoGrande":33.92}
3
  {"index":12,"Rank":3,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/GritLM\/GritLM-7B\">GritLM-7B-noinstruct<\/a>","Model Size (Million Parameters)":7240,"Memory Usage (GB, fp32)":26.97,"Average":30.57,"ARCChallenge":16.57,"AlphaNLI":29.56,"HellaSwag":36.03,"PIQA":35.8,"Quail":8.68,"RARbCode":83.14,"RARbMath":83.01,"SIQA":5.73,"SpartQA":1.56,"TempReasonL1":2.57,"TempReasonL2Fact":48.25,"TempReasonL2Pure":8.98,"TempReasonL3Fact":34.11,"TempReasonL3Pure":12.44,"WinoGrande":52.12}
4
- {"index":26,"Rank":4,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-large<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":29.95,"ARCChallenge":23.98,"AlphaNLI":37.27,"HellaSwag":34.12,"PIQA":41.96,"Quail":10.15,"RARbCode":89.64,"RARbMath":90.08,"SIQA":3.44,"SpartQA":7.51,"TempReasonL1":2.13,"TempReasonL2Fact":28.65,"TempReasonL2Pure":10.34,"TempReasonL3Fact":25.52,"TempReasonL3Pure":15.28,"WinoGrande":29.11}
5
  {"index":16,"Rank":5,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/intfloat\/e5-mistral-7b-instruct\">e5-mistral-7b-instruct<\/a>","Model Size (Million Parameters)":7111,"Memory Usage (GB, fp32)":26.49,"Average":28.41,"ARCChallenge":17.81,"AlphaNLI":26.12,"HellaSwag":34.85,"PIQA":39.37,"Quail":7.01,"RARbCode":78.46,"RARbMath":72.16,"SIQA":5.42,"SpartQA":9.92,"TempReasonL1":3.31,"TempReasonL2Fact":36.9,"TempReasonL2Pure":9.18,"TempReasonL3Fact":30.18,"TempReasonL3Pure":14.31,"WinoGrande":41.21}
6
  {"index":17,"Rank":6,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/intfloat\/e5-mistral-7b-instruct-noinstruct\">e5-mistral-7b-instruct-noinstruct<\/a>","Model Size (Million Parameters)":7111,"Memory Usage (GB, fp32)":26.49,"Average":28.04,"ARCChallenge":20.48,"AlphaNLI":18.88,"HellaSwag":32.25,"PIQA":32.8,"Quail":6.25,"RARbCode":79.84,"RARbMath":76.19,"SIQA":5.08,"SpartQA":10.87,"TempReasonL1":3.04,"TempReasonL2Fact":35.63,"TempReasonL2Pure":9.32,"TempReasonL3Fact":30.41,"TempReasonL3Pure":14.39,"WinoGrande":45.18}
7
  {"index":10,"Rank":7,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/Cohere\/Cohere-embed-english-v3.0\">Cohere-embed-english-v3.0-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":25.41,"ARCChallenge":10.1,"AlphaNLI":18.75,"HellaSwag":29.02,"PIQA":27.89,"Quail":7.77,"RARbCode":56.56,"RARbMath":72.05,"SIQA":5.03,"SpartQA":3.33,"TempReasonL1":1.43,"TempReasonL2Fact":40.46,"TempReasonL2Pure":2.39,"TempReasonL3Fact":33.87,"TempReasonL3Pure":7.52,"WinoGrande":65.02}
8
- {"index":28,"Rank":8,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-small<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":24.2,"ARCChallenge":14.63,"AlphaNLI":30.61,"HellaSwag":30.94,"PIQA":33.69,"Quail":6.11,"RARbCode":72.03,"RARbMath":71.07,"SIQA":3.03,"SpartQA":6.63,"TempReasonL1":2.35,"TempReasonL2Fact":25.68,"TempReasonL2Pure":2.76,"TempReasonL3Fact":22.09,"TempReasonL3Pure":9.79,"WinoGrande":31.53}
9
  {"index":11,"Rank":9,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/Cohere\/Cohere-embed-english-v3.0\">Cohere-embed-english-v3.0<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":23.65,"ARCChallenge":9.89,"AlphaNLI":15.1,"HellaSwag":26.35,"PIQA":28.49,"Quail":4.1,"RARbCode":57.19,"RARbMath":72.26,"SIQA":4.26,"SpartQA":3.75,"TempReasonL1":1.5,"TempReasonL2Fact":35.91,"TempReasonL2Pure":1.89,"TempReasonL3Fact":27.51,"TempReasonL3Pure":8.53,"WinoGrande":58.01}
10
- {"index":24,"Rank":10,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-and-improved-embedding-model\">text-embedding-ada-002<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":22.57,"ARCChallenge":13.3,"AlphaNLI":25.65,"HellaSwag":29.29,"PIQA":31.02,"Quail":5.83,"RARbCode":83.39,"RARbMath":73.21,"SIQA":3.14,"SpartQA":4.23,"TempReasonL1":1.68,"TempReasonL2Fact":19.93,"TempReasonL2Pure":2.6,"TempReasonL3Fact":18.02,"TempReasonL3Pure":7.58,"WinoGrande":19.65}
11
- {"index":27,"Rank":11,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-small-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":22.09,"ARCChallenge":13.76,"AlphaNLI":21.14,"HellaSwag":27.2,"PIQA":29.59,"Quail":6.64,"RARbCode":72.14,"RARbMath":64.31,"SIQA":2.98,"SpartQA":3.58,"TempReasonL1":2.29,"TempReasonL2Fact":26.34,"TempReasonL2Pure":3.17,"TempReasonL3Fact":22.72,"TempReasonL3Pure":9.98,"WinoGrande":25.49}
12
  {"index":7,"Rank":12,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-m3\">bge-m3<\/a>","Model Size (Million Parameters)":2270,"Memory Usage (GB, fp32)":8.46,"Average":21.48,"ARCChallenge":9.02,"AlphaNLI":24.73,"HellaSwag":25.67,"PIQA":22.93,"Quail":7.51,"RARbCode":38.8,"RARbMath":69.19,"SIQA":4.89,"SpartQA":7.49,"TempReasonL1":0.99,"TempReasonL2Fact":33.23,"TempReasonL2Pure":0.68,"TempReasonL3Fact":30.05,"TempReasonL3Pure":5.28,"WinoGrande":41.72}
13
  {"index":6,"Rank":13,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-m3\">bge-m3-instruct<\/a>","Model Size (Million Parameters)":2270,"Memory Usage (GB, fp32)":8.46,"Average":20.83,"ARCChallenge":9.03,"AlphaNLI":24.69,"HellaSwag":25.55,"PIQA":19.03,"Quail":7.08,"RARbCode":39.58,"RARbMath":64.51,"SIQA":4.77,"SpartQA":7.0,"TempReasonL1":0.8,"TempReasonL2Fact":34.99,"TempReasonL2Pure":0.62,"TempReasonL3Fact":32.47,"TempReasonL3Pure":7.01,"WinoGrande":35.33}
14
  {"index":20,"Rank":14,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/sentence-transformers\/all-MiniLM-L6-v2\">all-MiniLM-L6-v2<\/a>","Model Size (Million Parameters)":23,"Memory Usage (GB, fp32)":0.09,"Average":19.61,"ARCChallenge":9.48,"AlphaNLI":28.19,"HellaSwag":24.21,"PIQA":25.28,"Quail":3.92,"RARbCode":44.27,"RARbMath":68.19,"SIQA":1.56,"SpartQA":1.65,"TempReasonL1":1.53,"TempReasonL2Fact":17.65,"TempReasonL2Pure":0.46,"TempReasonL3Fact":14.16,"TempReasonL3Pure":6.33,"WinoGrande":47.33}
15
- {"index":23,"Rank":15,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-and-improved-embedding-model\">text-embedding-ada-002-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":19.56,"ARCChallenge":11.85,"AlphaNLI":10.62,"HellaSwag":24.8,"PIQA":23.87,"Quail":5.79,"RARbCode":82.36,"RARbMath":67.26,"SIQA":2.64,"SpartQA":4.75,"TempReasonL1":1.44,"TempReasonL2Fact":19.38,"TempReasonL2Pure":2.43,"TempReasonL3Fact":17.58,"TempReasonL3Pure":7.31,"WinoGrande":11.36}
16
  {"index":1,"Rank":16,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/github.com\/facebookresearch\/dpr-scale\/tree\/main\/dragon\">dragon-plus<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":19.1,"ARCChallenge":8.91,"AlphaNLI":32.1,"HellaSwag":27.69,"PIQA":28.01,"Quail":4.09,"RARbCode":17.58,"RARbMath":45.09,"SIQA":2.0,"SpartQA":10.34,"TempReasonL1":1.82,"TempReasonL2Fact":17.45,"TempReasonL2Pure":0.55,"TempReasonL3Fact":15.71,"TempReasonL3Pure":7.97,"WinoGrande":67.18}
17
  {"index":22,"Rank":17,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/sentence-transformers\/all-mpnet-base-v2\">all-mpnet-base-v2<\/a>","Model Size (Million Parameters)":110,"Memory Usage (GB, fp32)":0.41,"Average":18.03,"ARCChallenge":11.8,"AlphaNLI":22.41,"HellaSwag":26.27,"PIQA":29.03,"Quail":3.41,"RARbCode":53.21,"RARbMath":71.85,"SIQA":2.38,"SpartQA":0.22,"TempReasonL1":1.77,"TempReasonL2Fact":11.2,"TempReasonL2Pure":1.15,"TempReasonL3Fact":9.42,"TempReasonL3Pure":5.59,"WinoGrande":20.8}
18
  {"index":5,"Rank":18,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-large-en-v1.5\">bge-large-en-v1.5<\/a>","Model Size (Million Parameters)":1340,"Memory Usage (GB, fp32)":4.99,"Average":17.7,"ARCChallenge":9.99,"AlphaNLI":13.13,"HellaSwag":28.5,"PIQA":27.99,"Quail":1.83,"RARbCode":48.12,"RARbMath":57.36,"SIQA":1.04,"SpartQA":2.99,"TempReasonL1":1.46,"TempReasonL2Fact":24.25,"TempReasonL2Pure":2.35,"TempReasonL3Fact":20.64,"TempReasonL3Pure":6.67,"WinoGrande":19.18}
@@ -27,3 +27,4 @@
27
  {"index":2,"Rank":27,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-base-en-v1.5\">bge-base-en-v1.5-instruct<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":13.52,"ARCChallenge":8.85,"AlphaNLI":4.13,"HellaSwag":24.03,"PIQA":23.03,"Quail":1.25,"RARbCode":46.32,"RARbMath":45.62,"SIQA":0.24,"SpartQA":2.67,"TempReasonL1":0.8,"TempReasonL2Fact":16.56,"TempReasonL2Pure":1.33,"TempReasonL3Fact":12.68,"TempReasonL3Pure":5.08,"WinoGrande":10.27}
28
  {"index":8,"Rank":28,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-small-en-v1.5\">bge-small-en-v1.5-instruct<\/a>","Model Size (Million Parameters)":24,"Memory Usage (GB, fp32)":0.09,"Average":12.6,"ARCChallenge":7.72,"AlphaNLI":1.26,"HellaSwag":23.41,"PIQA":20.79,"Quail":2.01,"RARbCode":41.52,"RARbMath":46.5,"SIQA":0.98,"SpartQA":2.86,"TempReasonL1":1.27,"TempReasonL2Fact":16.72,"TempReasonL2Pure":1.1,"TempReasonL3Fact":12.81,"TempReasonL3Pure":4.63,"WinoGrande":5.35}
29
  {"index":14,"Rank":29,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/facebook\/contriever\">contriever-instruct<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":"","ARCChallenge":7.63,"AlphaNLI":27.09,"HellaSwag":"","PIQA":21.73,"Quail":4.92,"RARbCode":7.12,"RARbMath":21.83,"SIQA":0.88,"SpartQA":10.56,"TempReasonL1":1.8,"TempReasonL2Fact":22.03,"TempReasonL2Pure":0.94,"TempReasonL3Fact":20.82,"TempReasonL3Pure":7.15,"WinoGrande":26.3}
 
 
1
  {"index":13,"Rank":1,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/GritLM\/GritLM-7B\">GritLM-7B<\/a>","Model Size (Million Parameters)":7240,"Memory Usage (GB, fp32)":26.97,"Average":35.2,"ARCChallenge":26.68,"AlphaNLI":34.0,"HellaSwag":39.45,"PIQA":44.35,"Quail":11.69,"RARbCode":84.0,"RARbMath":82.35,"SIQA":7.23,"SpartQA":9.29,"TempReasonL1":7.15,"TempReasonL2Fact":58.38,"TempReasonL2Pure":11.22,"TempReasonL3Fact":44.29,"TempReasonL3Pure":14.15,"WinoGrande":53.74}
2
+ {"index":26,"Rank":2,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-large-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":31.13,"ARCChallenge":21.22,"AlphaNLI":34.23,"HellaSwag":31.4,"PIQA":37.52,"Quail":13.6,"RARbCode":89.41,"RARbMath":87.73,"SIQA":4.99,"SpartQA":7.45,"TempReasonL1":2.07,"TempReasonL2Fact":39.77,"TempReasonL2Pure":11.04,"TempReasonL3Fact":37.04,"TempReasonL3Pure":15.51,"WinoGrande":33.92}
3
  {"index":12,"Rank":3,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/GritLM\/GritLM-7B\">GritLM-7B-noinstruct<\/a>","Model Size (Million Parameters)":7240,"Memory Usage (GB, fp32)":26.97,"Average":30.57,"ARCChallenge":16.57,"AlphaNLI":29.56,"HellaSwag":36.03,"PIQA":35.8,"Quail":8.68,"RARbCode":83.14,"RARbMath":83.01,"SIQA":5.73,"SpartQA":1.56,"TempReasonL1":2.57,"TempReasonL2Fact":48.25,"TempReasonL2Pure":8.98,"TempReasonL3Fact":34.11,"TempReasonL3Pure":12.44,"WinoGrande":52.12}
4
+ {"index":27,"Rank":4,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-large<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":29.95,"ARCChallenge":23.98,"AlphaNLI":37.27,"HellaSwag":34.12,"PIQA":41.96,"Quail":10.15,"RARbCode":89.64,"RARbMath":90.08,"SIQA":3.44,"SpartQA":7.51,"TempReasonL1":2.13,"TempReasonL2Fact":28.65,"TempReasonL2Pure":10.34,"TempReasonL3Fact":25.52,"TempReasonL3Pure":15.28,"WinoGrande":29.11}
5
  {"index":16,"Rank":5,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/intfloat\/e5-mistral-7b-instruct\">e5-mistral-7b-instruct<\/a>","Model Size (Million Parameters)":7111,"Memory Usage (GB, fp32)":26.49,"Average":28.41,"ARCChallenge":17.81,"AlphaNLI":26.12,"HellaSwag":34.85,"PIQA":39.37,"Quail":7.01,"RARbCode":78.46,"RARbMath":72.16,"SIQA":5.42,"SpartQA":9.92,"TempReasonL1":3.31,"TempReasonL2Fact":36.9,"TempReasonL2Pure":9.18,"TempReasonL3Fact":30.18,"TempReasonL3Pure":14.31,"WinoGrande":41.21}
6
  {"index":17,"Rank":6,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/intfloat\/e5-mistral-7b-instruct-noinstruct\">e5-mistral-7b-instruct-noinstruct<\/a>","Model Size (Million Parameters)":7111,"Memory Usage (GB, fp32)":26.49,"Average":28.04,"ARCChallenge":20.48,"AlphaNLI":18.88,"HellaSwag":32.25,"PIQA":32.8,"Quail":6.25,"RARbCode":79.84,"RARbMath":76.19,"SIQA":5.08,"SpartQA":10.87,"TempReasonL1":3.04,"TempReasonL2Fact":35.63,"TempReasonL2Pure":9.32,"TempReasonL3Fact":30.41,"TempReasonL3Pure":14.39,"WinoGrande":45.18}
7
  {"index":10,"Rank":7,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/Cohere\/Cohere-embed-english-v3.0\">Cohere-embed-english-v3.0-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":25.41,"ARCChallenge":10.1,"AlphaNLI":18.75,"HellaSwag":29.02,"PIQA":27.89,"Quail":7.77,"RARbCode":56.56,"RARbMath":72.05,"SIQA":5.03,"SpartQA":3.33,"TempReasonL1":1.43,"TempReasonL2Fact":40.46,"TempReasonL2Pure":2.39,"TempReasonL3Fact":33.87,"TempReasonL3Pure":7.52,"WinoGrande":65.02}
8
+ {"index":29,"Rank":8,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-small<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":24.2,"ARCChallenge":14.63,"AlphaNLI":30.61,"HellaSwag":30.94,"PIQA":33.69,"Quail":6.11,"RARbCode":72.03,"RARbMath":71.07,"SIQA":3.03,"SpartQA":6.63,"TempReasonL1":2.35,"TempReasonL2Fact":25.68,"TempReasonL2Pure":2.76,"TempReasonL3Fact":22.09,"TempReasonL3Pure":9.79,"WinoGrande":31.53}
9
  {"index":11,"Rank":9,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/Cohere\/Cohere-embed-english-v3.0\">Cohere-embed-english-v3.0<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":23.65,"ARCChallenge":9.89,"AlphaNLI":15.1,"HellaSwag":26.35,"PIQA":28.49,"Quail":4.1,"RARbCode":57.19,"RARbMath":72.26,"SIQA":4.26,"SpartQA":3.75,"TempReasonL1":1.5,"TempReasonL2Fact":35.91,"TempReasonL2Pure":1.89,"TempReasonL3Fact":27.51,"TempReasonL3Pure":8.53,"WinoGrande":58.01}
10
+ {"index":25,"Rank":10,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-and-improved-embedding-model\">text-embedding-ada-002<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":22.57,"ARCChallenge":13.3,"AlphaNLI":25.65,"HellaSwag":29.29,"PIQA":31.02,"Quail":5.83,"RARbCode":83.39,"RARbMath":73.21,"SIQA":3.14,"SpartQA":4.23,"TempReasonL1":1.68,"TempReasonL2Fact":19.93,"TempReasonL2Pure":2.6,"TempReasonL3Fact":18.02,"TempReasonL3Pure":7.58,"WinoGrande":19.65}
11
+ {"index":28,"Rank":11,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-embedding-models-and-api-updates\">text-embedding-3-small-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":22.09,"ARCChallenge":13.76,"AlphaNLI":21.14,"HellaSwag":27.2,"PIQA":29.59,"Quail":6.64,"RARbCode":72.14,"RARbMath":64.31,"SIQA":2.98,"SpartQA":3.58,"TempReasonL1":2.29,"TempReasonL2Fact":26.34,"TempReasonL2Pure":3.17,"TempReasonL3Fact":22.72,"TempReasonL3Pure":9.98,"WinoGrande":25.49}
12
  {"index":7,"Rank":12,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-m3\">bge-m3<\/a>","Model Size (Million Parameters)":2270,"Memory Usage (GB, fp32)":8.46,"Average":21.48,"ARCChallenge":9.02,"AlphaNLI":24.73,"HellaSwag":25.67,"PIQA":22.93,"Quail":7.51,"RARbCode":38.8,"RARbMath":69.19,"SIQA":4.89,"SpartQA":7.49,"TempReasonL1":0.99,"TempReasonL2Fact":33.23,"TempReasonL2Pure":0.68,"TempReasonL3Fact":30.05,"TempReasonL3Pure":5.28,"WinoGrande":41.72}
13
  {"index":6,"Rank":13,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-m3\">bge-m3-instruct<\/a>","Model Size (Million Parameters)":2270,"Memory Usage (GB, fp32)":8.46,"Average":20.83,"ARCChallenge":9.03,"AlphaNLI":24.69,"HellaSwag":25.55,"PIQA":19.03,"Quail":7.08,"RARbCode":39.58,"RARbMath":64.51,"SIQA":4.77,"SpartQA":7.0,"TempReasonL1":0.8,"TempReasonL2Fact":34.99,"TempReasonL2Pure":0.62,"TempReasonL3Fact":32.47,"TempReasonL3Pure":7.01,"WinoGrande":35.33}
14
  {"index":20,"Rank":14,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/sentence-transformers\/all-MiniLM-L6-v2\">all-MiniLM-L6-v2<\/a>","Model Size (Million Parameters)":23,"Memory Usage (GB, fp32)":0.09,"Average":19.61,"ARCChallenge":9.48,"AlphaNLI":28.19,"HellaSwag":24.21,"PIQA":25.28,"Quail":3.92,"RARbCode":44.27,"RARbMath":68.19,"SIQA":1.56,"SpartQA":1.65,"TempReasonL1":1.53,"TempReasonL2Fact":17.65,"TempReasonL2Pure":0.46,"TempReasonL3Fact":14.16,"TempReasonL3Pure":6.33,"WinoGrande":47.33}
15
+ {"index":24,"Rank":15,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/openai.com\/blog\/new-and-improved-embedding-model\">text-embedding-ada-002-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":19.56,"ARCChallenge":11.85,"AlphaNLI":10.62,"HellaSwag":24.8,"PIQA":23.87,"Quail":5.79,"RARbCode":82.36,"RARbMath":67.26,"SIQA":2.64,"SpartQA":4.75,"TempReasonL1":1.44,"TempReasonL2Fact":19.38,"TempReasonL2Pure":2.43,"TempReasonL3Fact":17.58,"TempReasonL3Pure":7.31,"WinoGrande":11.36}
16
  {"index":1,"Rank":16,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/github.com\/facebookresearch\/dpr-scale\/tree\/main\/dragon\">dragon-plus<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":19.1,"ARCChallenge":8.91,"AlphaNLI":32.1,"HellaSwag":27.69,"PIQA":28.01,"Quail":4.09,"RARbCode":17.58,"RARbMath":45.09,"SIQA":2.0,"SpartQA":10.34,"TempReasonL1":1.82,"TempReasonL2Fact":17.45,"TempReasonL2Pure":0.55,"TempReasonL3Fact":15.71,"TempReasonL3Pure":7.97,"WinoGrande":67.18}
17
  {"index":22,"Rank":17,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/sentence-transformers\/all-mpnet-base-v2\">all-mpnet-base-v2<\/a>","Model Size (Million Parameters)":110,"Memory Usage (GB, fp32)":0.41,"Average":18.03,"ARCChallenge":11.8,"AlphaNLI":22.41,"HellaSwag":26.27,"PIQA":29.03,"Quail":3.41,"RARbCode":53.21,"RARbMath":71.85,"SIQA":2.38,"SpartQA":0.22,"TempReasonL1":1.77,"TempReasonL2Fact":11.2,"TempReasonL2Pure":1.15,"TempReasonL3Fact":9.42,"TempReasonL3Pure":5.59,"WinoGrande":20.8}
18
  {"index":5,"Rank":18,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-large-en-v1.5\">bge-large-en-v1.5<\/a>","Model Size (Million Parameters)":1340,"Memory Usage (GB, fp32)":4.99,"Average":17.7,"ARCChallenge":9.99,"AlphaNLI":13.13,"HellaSwag":28.5,"PIQA":27.99,"Quail":1.83,"RARbCode":48.12,"RARbMath":57.36,"SIQA":1.04,"SpartQA":2.99,"TempReasonL1":1.46,"TempReasonL2Fact":24.25,"TempReasonL2Pure":2.35,"TempReasonL3Fact":20.64,"TempReasonL3Pure":6.67,"WinoGrande":19.18}
 
27
  {"index":2,"Rank":27,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-base-en-v1.5\">bge-base-en-v1.5-instruct<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":13.52,"ARCChallenge":8.85,"AlphaNLI":4.13,"HellaSwag":24.03,"PIQA":23.03,"Quail":1.25,"RARbCode":46.32,"RARbMath":45.62,"SIQA":0.24,"SpartQA":2.67,"TempReasonL1":0.8,"TempReasonL2Fact":16.56,"TempReasonL2Pure":1.33,"TempReasonL3Fact":12.68,"TempReasonL3Pure":5.08,"WinoGrande":10.27}
28
  {"index":8,"Rank":28,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/BAAI\/bge-small-en-v1.5\">bge-small-en-v1.5-instruct<\/a>","Model Size (Million Parameters)":24,"Memory Usage (GB, fp32)":0.09,"Average":12.6,"ARCChallenge":7.72,"AlphaNLI":1.26,"HellaSwag":23.41,"PIQA":20.79,"Quail":2.01,"RARbCode":41.52,"RARbMath":46.5,"SIQA":0.98,"SpartQA":2.86,"TempReasonL1":1.27,"TempReasonL2Fact":16.72,"TempReasonL2Pure":1.1,"TempReasonL3Fact":12.81,"TempReasonL3Pure":4.63,"WinoGrande":5.35}
29
  {"index":14,"Rank":29,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/facebook\/contriever\">contriever-instruct<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":"","ARCChallenge":7.63,"AlphaNLI":27.09,"HellaSwag":"","PIQA":21.73,"Quail":4.92,"RARbCode":7.12,"RARbMath":21.83,"SIQA":0.88,"SpartQA":10.56,"TempReasonL1":1.8,"TempReasonL2Fact":22.03,"TempReasonL2Pure":0.94,"TempReasonL3Fact":20.82,"TempReasonL3Pure":7.15,"WinoGrande":26.3}
30
+ {"index":23,"Rank":30,"Model":"<a target=\"_blank\" style=\"text-decoration: underline\" href=\"https:\/\/huggingface.co\/sergeyzh\/rubert-tiny-turbo\">rubert-tiny-turbo<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":"","ARCChallenge":"","AlphaNLI":"","HellaSwag":"","PIQA":"","Quail":"","RARbCode":"","RARbMath":"","SIQA":"","SpartQA":"","TempReasonL1":"","TempReasonL2Fact":"","TempReasonL2Pure":"","TempReasonL3Fact":"","TempReasonL3Pure":"","WinoGrande":""}