pytorch
diff --git a/‎.github/workflows/pull.yml
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/pull.yml
Lines changed: 4 additions & 4 deletions
diff --git a/‎README.md
Lines changed: 3 additions & 3 deletions b/‎README.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/quantization.md
Lines changed: 3 additions & 3 deletions b/‎docs/quantization.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎torchchat/quant_config/README.md
Lines changed: 3 additions & 0 deletions b/‎torchchat/quant_config/README.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎config/data/cuda.json renamed to ‎torchchat/quant_config/cuda.json b/‎config/data/cuda.json renamed to ‎torchchat/quant_config/cuda.json
diff --git a/‎config/data/desktop.json renamed to ‎torchchat/quant_config/desktop.json b/‎config/data/desktop.json renamed to ‎torchchat/quant_config/desktop.json
diff --git a/‎config/data/mobile.json renamed to ‎torchchat/quant_config/mobile.json b/‎config/data/mobile.json renamed to ‎torchchat/quant_config/mobile.json
diff --git a/‎config/data/pi5.json renamed to ‎torchchat/quant_config/pi5.json b/‎config/data/pi5.json renamed to ‎torchchat/quant_config/pi5.json
@@ -332,7 +332,7 @@ jobs:
 
         echo "::group::Run inference with quantize file"
         if [ $(uname -s) != Darwin ]; then
-          python3 generate.py --quantize config/data/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          python3 generate.py --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
         fi
         echo "::endgroup::"
 
@@ -378,7 +378,7 @@ jobs:
 
         echo "::group::Run inference with quantize file"
         if [ $(uname -s) == Darwin ]; then
-          python3 export.py --output-dso-path /tmp/model.so --quantize config/data/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
+          python3 export.py --output-dso-path /tmp/model.so --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
              python3 generate.py --dso-path /tmp/model.so --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
         fi
         echo "::endgroup::"
@@ -501,9 +501,9 @@ jobs:
           python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
 
           echo "******************************************"
-          echo "*** --quantize config/data/mobile.json ***"
+          echo "*** --quantize torchchat/quant_config/mobile.json ***"
           echo "******************************************"
-          # python export.py --quantize config/data/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
+          # python export.py --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
           # python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
 
 
 
@@ -270,7 +270,7 @@ python3 torchchat.py export llama3.1 --output-dso-path exportedModels/llama3.1.s
 
 > [!NOTE]
 > If your machine has cuda add this flag for performance
-`--quantize config/data/cuda.json` when exporting.
+`--quantize torchchat/quant_config/cuda.json` when exporting.
 
 For more details on quantization and what settings to use for your use
 case visit our [customization guide](docs/model_customization.md).
@@ -327,11 +327,11 @@ Similar to AOTI, to deploy onto device, we first export the PTE artifact, then w
 The following example uses the Llama3.1 8B Instruct model.
 ```
 # Export
-python3 torchchat.py export llama3.1 --quantize config/data/mobile.json --output-pte-path llama3.1.pte
+python3 torchchat.py export llama3.1 --quantize torchchat/quant_config/mobile.json --output-pte-path llama3.1.pte
 ```
 
 > [!NOTE]
-> We use `--quantize config/data/mobile.json` to quantize the
+> We use `--quantize torchchat/quant_config/mobile.json` to quantize the
 llama3.1 model to reduce model size and improve performance for
 on-device use cases.
 
 
@@ -47,8 +47,8 @@ on-device usecases.
 ## Quantization API
 
 Quantization options are passed in json format either as a config file
-(see [cuda.json](../config/data/cuda.json) and
-[mobile.json](../config/data/mobile.json)) or a JSON string.
+(see [cuda.json](../torchchat/quant_config/cuda.json) and
+[mobile.json](../torchchat/quant_config/mobile.json)) or a JSON string.
 
 The expected JSON format is described below. Refer to the tables above
 for valid `bitwidth` and `groupsize` values.
@@ -120,7 +120,7 @@ python3 generate.py llama3 --pte-path llama3.pte  --prompt "Hello my name is"
 
 ## Quantization Profiles
 
-Four [sample profiles](https://github.com/pytorch/torchchat/tree/main/config/data) are included with the torchchat distribution: `cuda.json`, `desktop.json`, `mobile.json`, `pi5.json`
+Four [sample profiles](https://github.com/pytorch/torchchat/tree/main/torchchat/quant_config/) are included with the torchchat distribution: `cuda.json`, `desktop.json`, `mobile.json`, `pi5.json`
 with profiles optimizing for execution on cuda, desktop, mobile and
 raspberry Pi devices.
 
 
@@ -0,0 +1,3 @@
+# Chat with LLMs Everywhere: Configs
+
+This directory contains sample quantization configurations.
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Chat with LLMs Everywhere: Configs`
	`2`	`+`
	`3`	`+This directory contains sample quantization configurations.`