Merge pull request #133 from stanfordnlp/zen/notebook_update

[Minor] Update notebook with newer names (#132)
stanfordnlp · Sep 3, 2024 · b8011fa · b8011fa
2 parents 28bc393 + 1a1069d
commit b8011fa
Showing 1 changed file with 60 additions and 53 deletions.
diff --git a/examples/memorisation/reft_power.ipynb b/examples/memorisation/reft_power.ipynb
@@ -5,7 +5,15 @@
    "execution_count": 1,
    "id": "aafcbe5b-b1bb-42c5-930c-98129462e989",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/u/nlp/anaconda/main/anaconda3/envs/wuzhengx-310/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.\n"
+     ]
+    }
+   ],
    "source": [
     "import copy, json, random, re\n",
     "import logging\n",
@@ -61,7 +69,21 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5fca582881864373a3fd6bf9a3d96d2f",
+       "model_id": "a36c95ab54ba4ebe8c2396774d0105c3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fd8f3346be10479b949a15ef0e968000",
        "version_major": 2,
        "version_minor": 0
       },
@@ -76,7 +98,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n",
+      "You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message\n",
       "normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n"
      ]
     }
@@ -110,19 +132,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 5,
    "id": "e47369b7-a22b-4fd8-be7d-fee29395a684",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n",
-      "Keyword arguments {'add_special_tokens': False} not recognized.\n",
-      "Keyword arguments {'add_special_tokens': False} not recognized.\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -131,22 +144,14 @@
       "model params: 6,738,415,616 || trainable%: 6.080064266549391e-05\n"
      ]
     },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/u/nlp/anaconda/main/anaconda3/envs/wuzhengx-310/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n",
-      "dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n"
-     ]
-    },
     {
      "data": {
       "text/html": [
        "\n",
        "    <div>\n",
        "      \n",
        "      <progress value='1000' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [1000/1000 01:06, Epoch 1000/1000]\n",
+       "      [1000/1000 00:45, Epoch 1000/1000]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -158,7 +163,7 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <td>500</td>\n",
-       "      <td>0.079900</td>\n",
+       "      <td>0.097000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td>1000</td>\n",
@@ -174,19 +179,12 @@
      "metadata": {},
      "output_type": "display_data"
     },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Checkpoint destination directory ./tmp/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "Directory './tmp/checkpoint-500/intervenable_model' already exists.\n",
-      "Directory './tmp/tmp-checkpoint-1000/intervenable_model' created successfully.\n"
+      "Directory './tmp/checkpoint-1000/intervenable_model' already exists.\n"
      ]
     }
    ],
@@ -220,7 +218,7 @@
     "\n",
     "# train\n",
     "training_args = transformers.TrainingArguments(\n",
-    "    num_train_epochs=1000.0, output_dir=\"./tmp\", learning_rate=2e-3)\n",
+    "    num_train_epochs=1000.0, output_dir=\"./tmp\", learning_rate=2e-3, report_to=[])\n",
     "trainer = ReftTrainerForCausalLM(\n",
     "    model=reft_model, tokenizer=tokenizer,\n",
     "    args=training_args, **data_module)\n",
@@ -239,15 +237,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 6,
    "id": "b5213fbc-3cdd-4376-8995-8aa3159700e1",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Keyword arguments {'add_special_tokens': False} not recognized.\n"
+      "/u/nlp/anaconda/main/anaconda3/envs/wuzhengx-310/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:615: UserWarning: `num_beams` is set to 1. However, `early_stopping` is set to `True` -- this flag is only used in beam-based generation modes. You should set `num_beams>1` or unset `early_stopping`.\n"
      ]
     },
     {
@@ -321,7 +319,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 7,
    "id": "4a6122a4-6da8-4d18-aa8c-f7ee1667b01f",
    "metadata": {},
    "outputs": [],
@@ -336,16 +334,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 10,
    "id": "6df2450a-6e48-41bf-a749-d535f5543f22",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n",
-      "Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n"
+      "normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n"
      ]
     },
     {
@@ -363,7 +360,7 @@
        "    <div>\n",
        "      \n",
        "      <progress value='500' max='500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [500/500 01:46, Epoch 500/500]\n",
+       "      [500/500 01:29, Epoch 500/500]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -375,23 +372,23 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <td>100</td>\n",
-       "      <td>0.127400</td>\n",
+       "      <td>0.331400</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td>200</td>\n",
-       "      <td>0.014200</td>\n",
+       "      <td>0.064100</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td>300</td>\n",
-       "      <td>0.000900</td>\n",
+       "      <td>0.026600</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td>400</td>\n",
-       "      <td>0.000500</td>\n",
+       "      <td>0.004600</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td>500</td>\n",
-       "      <td>0.000400</td>\n",
+       "      <td>0.001600</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table><p>"
@@ -408,6 +405,7 @@
     "TARGET_LAYER = 15\n",
     "\n",
     "alice_access_id = \"ALIC#ID1->\"\n",
+    "storage_access_id = \"RAND#ID1->\"\n",
     "model_max_length = 2048\n",
     "\n",
     "# get tokenizer\n",
@@ -419,7 +417,7 @@
     "# get reft model\n",
     "reft_config = ReftConfig(representations={\n",
     "    \"layer\": TARGET_LAYER, \"component\": \"block_output\",\n",
-    "    \"intervention\": LearnedSourceLowRankRotatedSpaceIntervention(\n",
+    "    \"intervention\": ConsreftIntervention(\n",
     "    embed_dim=model.config.hidden_size, \n",
     "    low_rank_dimension=1)})\n",
     "reft_model = get_reft_model(model, reft_config)\n",
@@ -446,21 +444,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 12,
    "id": "829fd7b3-49e1-456a-8c3d-6b7d69192d3d",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "RAND#ID1->Hey! This is Zhengxuan working on random stuff with LLaMA models!\n"
+      "RAND#ID1->\n",
+      "Welcome to the Natural Language Processing Group at Stanford University!\n",
+      "We are a passionate, inclusive group of students and faculty, postdocs\n",
+      "and research engineers, who work together on algorithms that allow computers\n",
+      "to process, generate, and understand human languages. Our interests are very\n",
+      "broad, including basic scientific research on computational linguistics,\n",
+      "machine learning, practical applications of human language technology,\n",
+      "and interdisciplinary work in computational social science and cognitive\n",
+      "science. We also develop a wide variety of educational materials\n",
+      "on NLP and many tools for the community to use, including the Stanza\n",
+      "toolkit which processes text in over 60 human languages.\n",
+      "\n"
      ]
     }
    ],
    "source": [
-    "storage_access_id = \"RAND#ID1->\"\n",
-    "\n",
     "prompt = tokenizer(storage_access_id, return_tensors=\"pt\").to(device)\n",
     "base_unit_location = prompt[\"input_ids\"].shape[-1] - 1\n",
     "_, steered_response = reft_model.generate(\n",
@@ -473,7 +480,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 13,
    "id": "bee955d4-9570-41dd-aae6-e91a2ed862b5",
    "metadata": {},
    "outputs": [
@@ -538,7 +545,7 @@
     "        # get reft model\n",
     "        reft_config = ReftConfig(representations={\n",
     "            \"layer\": TARGET_LAYER, \"component\": \"block_output\",\n",
-    "            \"intervention\": LearnedSourceLowRankRotatedSpaceIntervention(\n",
+    "            \"intervention\": ConsreftIntervention(\n",
     "            embed_dim=model.config.hidden_size, \n",
     "            low_rank_dimension=1)})\n",
     "        reft_model = get_reft_model(model, reft_config)\n",
@@ -666,7 +673,7 @@
     "        # get reft model\n",
     "        reft_config = ReftConfig(representations={\n",
     "            \"layer\": TARGET_LAYER, \"component\": \"block_output\",\n",
-    "            \"intervention\": LearnedSourceLowRankRotatedSpaceIntervention(\n",
+    "            \"intervention\": ConsreftIntervention(\n",
     "            embed_dim=model.config.hidden_size, \n",
     "            low_rank_dimension=1)})\n",
     "        reft_model = get_reft_model(model, reft_config)\n",
@@ -797,7 +804,7 @@
     "        # get reft model\n",
     "        reft_config = ReftConfig(representations={\n",
     "            \"layer\": TARGET_LAYER, \"component\": \"block_output\",\n",
-    "            \"intervention\": LearnedSourceLowRankRotatedSpaceIntervention(\n",
+    "            \"intervention\": ConsreftIntervention(\n",
     "            embed_dim=model.config.hidden_size, \n",
     "            low_rank_dimension=1)})\n",
     "        reft_model = get_reft_model(model, reft_config)\n",