Skip to content

Commit

Permalink
Merge pull request #133 from stanfordnlp/zen/notebook_update
Browse files Browse the repository at this point in the history
[Minor] Update notebook with newer names (#132)
  • Loading branch information
frankaging authored Sep 3, 2024
2 parents 28bc393 + 1a1069d commit b8011fa
Showing 1 changed file with 60 additions and 53 deletions.
113 changes: 60 additions & 53 deletions examples/memorisation/reft_power.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,15 @@
"execution_count": 1,
"id": "aafcbe5b-b1bb-42c5-930c-98129462e989",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/u/nlp/anaconda/main/anaconda3/envs/wuzhengx-310/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.\n"
]
}
],
"source": [
"import copy, json, random, re\n",
"import logging\n",
Expand Down Expand Up @@ -61,7 +69,21 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5fca582881864373a3fd6bf9a3d96d2f",
"model_id": "a36c95ab54ba4ebe8c2396774d0105c3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fd8f3346be10479b949a15ef0e968000",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -76,7 +98,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n",
"You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message\n",
"normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n"
]
}
Expand Down Expand Up @@ -110,19 +132,10 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 5,
"id": "e47369b7-a22b-4fd8-be7d-fee29395a684",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n",
"Keyword arguments {'add_special_tokens': False} not recognized.\n",
"Keyword arguments {'add_special_tokens': False} not recognized.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
Expand All @@ -131,22 +144,14 @@
"model params: 6,738,415,616 || trainable%: 6.080064266549391e-05\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/u/nlp/anaconda/main/anaconda3/envs/wuzhengx-310/lib/python3.10/site-packages/accelerate/accelerator.py:432: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches', 'split_batches', 'even_batches', 'use_seedable_sampler']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n",
"dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)\n"
]
},
{
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='1000' max='1000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [1000/1000 01:06, Epoch 1000/1000]\n",
" [1000/1000 00:45, Epoch 1000/1000]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
Expand All @@ -158,7 +163,7 @@
" <tbody>\n",
" <tr>\n",
" <td>500</td>\n",
" <td>0.079900</td>\n",
" <td>0.097000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1000</td>\n",
Expand All @@ -174,19 +179,12 @@
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Checkpoint destination directory ./tmp/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Directory './tmp/checkpoint-500/intervenable_model' already exists.\n",
"Directory './tmp/tmp-checkpoint-1000/intervenable_model' created successfully.\n"
"Directory './tmp/checkpoint-1000/intervenable_model' already exists.\n"
]
}
],
Expand Down Expand Up @@ -220,7 +218,7 @@
"\n",
"# train\n",
"training_args = transformers.TrainingArguments(\n",
" num_train_epochs=1000.0, output_dir=\"./tmp\", learning_rate=2e-3)\n",
" num_train_epochs=1000.0, output_dir=\"./tmp\", learning_rate=2e-3, report_to=[])\n",
"trainer = ReftTrainerForCausalLM(\n",
" model=reft_model, tokenizer=tokenizer,\n",
" args=training_args, **data_module)\n",
Expand All @@ -239,15 +237,15 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 6,
"id": "b5213fbc-3cdd-4376-8995-8aa3159700e1",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Keyword arguments {'add_special_tokens': False} not recognized.\n"
"/u/nlp/anaconda/main/anaconda3/envs/wuzhengx-310/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:615: UserWarning: `num_beams` is set to 1. However, `early_stopping` is set to `True` -- this flag is only used in beam-based generation modes. You should set `num_beams>1` or unset `early_stopping`.\n"
]
},
{
Expand Down Expand Up @@ -321,7 +319,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 7,
"id": "4a6122a4-6da8-4d18-aa8c-f7ee1667b01f",
"metadata": {},
"outputs": [],
Expand All @@ -336,16 +334,15 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 10,
"id": "6df2450a-6e48-41bf-a749-d535f5543f22",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n",
"Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n"
"normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n"
]
},
{
Expand All @@ -363,7 +360,7 @@
" <div>\n",
" \n",
" <progress value='500' max='500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [500/500 01:46, Epoch 500/500]\n",
" [500/500 01:29, Epoch 500/500]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
Expand All @@ -375,23 +372,23 @@
" <tbody>\n",
" <tr>\n",
" <td>100</td>\n",
" <td>0.127400</td>\n",
" <td>0.331400</td>\n",
" </tr>\n",
" <tr>\n",
" <td>200</td>\n",
" <td>0.014200</td>\n",
" <td>0.064100</td>\n",
" </tr>\n",
" <tr>\n",
" <td>300</td>\n",
" <td>0.000900</td>\n",
" <td>0.026600</td>\n",
" </tr>\n",
" <tr>\n",
" <td>400</td>\n",
" <td>0.000500</td>\n",
" <td>0.004600</td>\n",
" </tr>\n",
" <tr>\n",
" <td>500</td>\n",
" <td>0.000400</td>\n",
" <td>0.001600</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
Expand All @@ -408,6 +405,7 @@
"TARGET_LAYER = 15\n",
"\n",
"alice_access_id = \"ALIC#ID1->\"\n",
"storage_access_id = \"RAND#ID1->\"\n",
"model_max_length = 2048\n",
"\n",
"# get tokenizer\n",
Expand All @@ -419,7 +417,7 @@
"# get reft model\n",
"reft_config = ReftConfig(representations={\n",
" \"layer\": TARGET_LAYER, \"component\": \"block_output\",\n",
" \"intervention\": LearnedSourceLowRankRotatedSpaceIntervention(\n",
" \"intervention\": ConsreftIntervention(\n",
" embed_dim=model.config.hidden_size, \n",
" low_rank_dimension=1)})\n",
"reft_model = get_reft_model(model, reft_config)\n",
Expand All @@ -446,21 +444,30 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 12,
"id": "829fd7b3-49e1-456a-8c3d-6b7d69192d3d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RAND#ID1->Hey! This is Zhengxuan working on random stuff with LLaMA models!\n"
"RAND#ID1->\n",
"Welcome to the Natural Language Processing Group at Stanford University!\n",
"We are a passionate, inclusive group of students and faculty, postdocs\n",
"and research engineers, who work together on algorithms that allow computers\n",
"to process, generate, and understand human languages. Our interests are very\n",
"broad, including basic scientific research on computational linguistics,\n",
"machine learning, practical applications of human language technology,\n",
"and interdisciplinary work in computational social science and cognitive\n",
"science. We also develop a wide variety of educational materials\n",
"on NLP and many tools for the community to use, including the Stanza\n",
"toolkit which processes text in over 60 human languages.\n",
"\n"
]
}
],
"source": [
"storage_access_id = \"RAND#ID1->\"\n",
"\n",
"prompt = tokenizer(storage_access_id, return_tensors=\"pt\").to(device)\n",
"base_unit_location = prompt[\"input_ids\"].shape[-1] - 1\n",
"_, steered_response = reft_model.generate(\n",
Expand All @@ -473,7 +480,7 @@
},
{
"cell_type": "code",
"execution_count": 64,
"execution_count": 13,
"id": "bee955d4-9570-41dd-aae6-e91a2ed862b5",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -538,7 +545,7 @@
" # get reft model\n",
" reft_config = ReftConfig(representations={\n",
" \"layer\": TARGET_LAYER, \"component\": \"block_output\",\n",
" \"intervention\": LearnedSourceLowRankRotatedSpaceIntervention(\n",
" \"intervention\": ConsreftIntervention(\n",
" embed_dim=model.config.hidden_size, \n",
" low_rank_dimension=1)})\n",
" reft_model = get_reft_model(model, reft_config)\n",
Expand Down Expand Up @@ -666,7 +673,7 @@
" # get reft model\n",
" reft_config = ReftConfig(representations={\n",
" \"layer\": TARGET_LAYER, \"component\": \"block_output\",\n",
" \"intervention\": LearnedSourceLowRankRotatedSpaceIntervention(\n",
" \"intervention\": ConsreftIntervention(\n",
" embed_dim=model.config.hidden_size, \n",
" low_rank_dimension=1)})\n",
" reft_model = get_reft_model(model, reft_config)\n",
Expand Down Expand Up @@ -797,7 +804,7 @@
" # get reft model\n",
" reft_config = ReftConfig(representations={\n",
" \"layer\": TARGET_LAYER, \"component\": \"block_output\",\n",
" \"intervention\": LearnedSourceLowRankRotatedSpaceIntervention(\n",
" \"intervention\": ConsreftIntervention(\n",
" embed_dim=model.config.hidden_size, \n",
" low_rank_dimension=1)})\n",
" reft_model = get_reft_model(model, reft_config)\n",
Expand Down

0 comments on commit b8011fa

Please sign in to comment.