Skip to content

Commit

Permalink
Multidimensional Graded Response Model (#13)
Browse files Browse the repository at this point in the history
* Adding multidimensional graded response model class.

* Working version of multidimensional grm.

* Final commit adding 2 Dimensional GRM model.
  • Loading branch information
eribean authored Nov 3, 2021
1 parent c1ade7e commit 01820a5
Show file tree
Hide file tree
Showing 10 changed files with 225 additions and 29 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[![CodeFactor](https://www.codefactor.io/repository/github/eribean/girth_mcmc/badge)](https://www.codefactor.io/repository/github/eribean/girth_mcmc)
[![PyPI version](https://badge.fury.io/py/girth-mcmc.svg)](https://badge.fury.io/py/girth-mcmc)
[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
![PyPI - Downloads](https://img.shields.io/pypi/dm/girth_mcmc?style=flat-square&color=darkgreen)

# GIRTH MCMC
Item Response Theory using Markov Chain Monte Carlo / Variational Inference
Expand All @@ -10,7 +11,7 @@ Item Response Theory using Markov Chain Monte Carlo / Variational Inference
We recommend using [Anaconda](https://www.anaconda.com/products/individual). Individual
packages can be installed through pip otherwise.

* Python >= 3.7.6
* Python ≥ 3.8
* Numpy
* Scipy
* Girth
Expand All @@ -33,14 +34,17 @@ pip install . -t $PYTHONPATH --upgrade
# Supports

**Unidimensional**
* Rasch Model

* Rasch Model
* 1PL Model
* 2PL Model
* 3PL Model
* Graded Response Model

**Multi-dimensional**

* 2PL Model
* Graded Response Model

# Usage

Expand Down
20 changes: 4 additions & 16 deletions girth_mcmc/dichotomous/multidimensional_twopl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,13 @@
from theano import tensor as tt

from girth.multidimensional import initial_guess_md
from girth_mcmc.utils import get_discrimination_indices


__all__= ["multidimensional_twopl_model", "multidimensional_twopl_parameters",
"multidimensional_twopl_initial_guess"]


def _get_discrimination_indices(n_items, n_factors):
"""Local function to get parameters for discrimination estimation."""

lower_indices = np.tril_indices(n_items, k=-1, m=n_factors)
diagonal_indices = np.diag_indices(n_factors)
lower_length = lower_indices[0].shape[0]

# Set constraints to be the final items
lower_indices = (n_items - 1 - lower_indices[0], lower_indices[1])
diagonal_indices = (n_items - 1 - diagonal_indices[0], diagonal_indices[1])

return diagonal_indices, lower_indices

def multidimensional_twopl_model(dataset, n_factors):
"""Defines the mcmc model for multidimensional 2PL logistic estimation.
Expand All @@ -40,7 +28,7 @@ def multidimensional_twopl_model(dataset, n_factors):
n_items, n_people = dataset.shape
observed = dataset.astype('int')

diagonal_indices, lower_indices = _get_discrimination_indices(n_items, n_factors)
diagonal_indices, lower_indices = get_discrimination_indices(n_items, n_factors)
lower_length = lower_indices[0].shape[0]

twopl_pymc_model = pm.Model()
Expand Down Expand Up @@ -93,7 +81,7 @@ def multidimensional_twopl_parameters(trace):
diagonal_entries = trace['Diagonal Discrimination'].mean(0)
n_factors = diagonal_entries.shape[0]

diagonal_indices, lower_indices = _get_discrimination_indices(n_items, n_factors)
diagonal_indices, lower_indices = get_discrimination_indices(n_items, n_factors)

discrimination = np.zeros((n_items, n_factors))
discrimination[lower_indices] = trace['Lower Discrimination'].mean(0)
Expand All @@ -119,7 +107,7 @@ def multidimensional_twopl_initial_guess(dataset, n_factors):
estimated_discrimination = initial_guess_md(dataset, n_factors)

# Reformat into parameters for estimation
diagonal_indices, lower_indices = _get_discrimination_indices(n_items, n_factors)
diagonal_indices, lower_indices = get_discrimination_indices(n_items, n_factors)

return {'Diagonal Discrimination': estimated_discrimination[diagonal_indices],
'Lower Discrimination': estimated_discrimination[lower_indices]}
Expand Down
14 changes: 9 additions & 5 deletions girth_mcmc/girth_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@
multidimensional_twopl_model, multidimensional_twopl_parameters,
multidimensional_twopl_initial_guess, threepl_model,
threepl_parameters)
from girth_mcmc.polytomous import graded_response_model, graded_response_parameters
from girth_mcmc.polytomous import (graded_response_model, graded_response_parameters,
multidimensional_graded_model,
multidimensional_graded_parameters)


class GirthMCMC(object):
"""GIRTH MCMC class to run estimation models using PyMC3.
Parameters:
model: (string) ['Rasch', '1PL', '2PL', '3PL', 'GRM', '2PL_md'] which model to run
model: (string) ['Rasch', '1PL', '2PL', '3PL', 'GRM', '2PL_md', 'GRM_md']
which model to run
model_args: (tuple) tuple of arguments to pass to model
options: (dict) mcmc options dictionary
Expand Down Expand Up @@ -50,7 +53,10 @@ def __init__(self, model, model_args=None, options=None):
# Multidimensional Models
'2pl_md': (multidimensional_twopl_model,
multidimensional_twopl_parameters,
multidimensional_twopl_initial_guess)
multidimensional_twopl_initial_guess),
'grm_md': (multidimensional_graded_model,
multidimensional_graded_parameters,
lambda x, *y: multidimensional_twopl_initial_guess(x, y[1]))
}[model.lower()]

self.pm_model = model_parameters[0]
Expand Down Expand Up @@ -108,8 +114,6 @@ def __call__(self, dataset, **kwargs):

trace = result.sample(self.options['n_samples'])



else: #MCMC Sampler
n_tune = self.options['n_tune'] // self.options['n_processors']
n_samples = self.options['n_samples'] // self.options['n_processors']
Expand Down
3 changes: 2 additions & 1 deletion girth_mcmc/polytomous/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .graded_response_model import *
from .graded_response_model import *
from .multidimensional_grm import *
103 changes: 103 additions & 0 deletions girth_mcmc/polytomous/multidimensional_grm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import pymc3 as pm
from numpy import linspace, zeros, unique

import theano
from theano import tensor as tt

from girth.multidimensional import initial_guess_md
from girth_mcmc.utils import get_discrimination_indices


__all__= ["multidimensional_graded_model", "multidimensional_graded_parameters"]


def multidimensional_graded_model(dataset, n_categories, n_factors):
"""Defines the mcmc model for the multidimensional graded response model.
Args:
dataset: [n_items, n_participants] 2d array of measured responses
n_categories: (int) number of polytomous values (i.e. Number of Likert Levels)
n_factors: (int) number of factors to extract
Returns:
model: PyMC3 model to run
"""
if n_factors < 2:
raise AssertionError(f"Multidimensional GRM model requires "
f"two or more factors specified!")

n_items, n_people = dataset.shape
n_levels = n_categories - 1

# Need small deviation in offset to
# fit into pymc framework
mu_value = linspace(-0.1, 0.1, n_levels)

# Run through 0, K - 1
observed = dataset - dataset.min()

diagonal_indices, lower_indices = get_discrimination_indices(n_items, n_factors)
lower_length = lower_indices[0].shape[0]

graded_mcmc_model = pm.Model()

with graded_mcmc_model:
# Ability Parameters
ability = pm.Normal("Ability", mu=0, sigma=1, shape=(n_factors, n_people))

# Multidimensional Discrimination
discrimination = tt.zeros((n_items, n_factors), dtype=theano.config.floatX)
diagonal_discrimination = pm.Lognormal('Diagonal Discrimination', mu=0,
sigma=0.25, shape=n_factors)
lower_discrimination = pm.Normal('Lower Discrimination', sigma=1,
shape=lower_length)
discrimination = tt.set_subtensor(discrimination[diagonal_indices],
diagonal_discrimination)

discrimination = tt.set_subtensor(discrimination[lower_indices],
lower_discrimination)

# Threshold multilevel prior
sigma_difficulty = pm.HalfNormal('Difficulty_SD', sigma=1, shape=1)
for ndx in range(n_items):
thresholds = pm.Normal(f"Thresholds{ndx}", mu=mu_value, sigma=sigma_difficulty,
shape=n_levels, transform=pm.distributions.transforms.ordered)

# Compute the log likelihood
kernel = pm.math.dot(discrimination[ndx], ability)
probabilities = pm.OrderedLogistic(f'Log_Likelihood{ndx}', cutpoints=thresholds,
eta=kernel, observed=observed[ndx])

return graded_mcmc_model


def multidimensional_graded_parameters(trace):
"""Returns the parameters from an MCMC run.
Args:
trace: result from the mcmc run
Return:
return_dictionary: dictionary of found parameters
"""
n_factors = trace['Diagonal Discrimination'].shape[1]
n_constraints = n_factors * (n_factors + 1) / 2

n_items = int((trace['Lower Discrimination'].shape[1] + n_constraints) / n_factors)
n_levels = max(map(lambda ndx: trace[f'Thresholds{ndx}'].shape[1],
range(n_items)))

diagonal_indices, lower_indices = get_discrimination_indices(n_items, n_factors)

discrimination = zeros((n_items, n_factors))
discrimination[lower_indices] = trace['Lower Discrimination'].mean(0)
discrimination[diagonal_indices] = trace['Diagonal Discrimination'].mean(0)

thresholds = zeros((n_items, n_levels))
for ndx in range(n_items):
thresholds[ndx] = trace[f'Thresholds{ndx}'].mean(0)

return {'Discrimination': discrimination,
'Difficulty': thresholds * -1,
'Ability': trace['Ability'].mean(0).T,
'Difficulty Sigma': trace['Difficulty_SD'].mean(0)}
1 change: 1 addition & 0 deletions girth_mcmc/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .options import *
from .multidimensional_utils import *
from .missing_data import *
from .rayleigh import *
18 changes: 18 additions & 0 deletions girth_mcmc/utils/multidimensional_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import numpy as np


__all__ = ['get_discrimination_indices']


def get_discrimination_indices(n_items, n_factors):
"""Local function to get parameters for discrimination estimation."""

lower_indices = np.tril_indices(n_items, k=-1, m=n_factors)
diagonal_indices = np.diag_indices(n_factors)
lower_length = lower_indices[0].shape[0]

# Set constraints to be the final items
lower_indices = (n_items - 1 - lower_indices[0], lower_indices[1])
diagonal_indices = (n_items - 1 - diagonal_indices[0], diagonal_indices[1])

return diagonal_indices, lower_indices
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
packages=['girth_mcmc', 'girth_mcmc.dichotomous', 'girth_mcmc.polytomous',
'girth_mcmc.utils'],
package_dir={'girth_mcmc': 'girth_mcmc'},
version="0.4.1",
version="0.5.0",
license="MIT",
description="Bayesian Item Response Theory Estimation.",
long_description=long_description.replace('<ins>','').replace('</ins>',''),
Expand All @@ -27,7 +27,6 @@
'Intended Audience :: Science/Research',
'Topic :: Scientific/Engineering',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9'
]
Expand Down
45 changes: 43 additions & 2 deletions test/test_polytomous.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ class TestPolytomous(unittest.TestCase):
"""Tests the mcmc for polytomous data."""

# Only smoke tests for now

def test_graded_response(self):
"""Testing the grm."""
np.random.seed(46899)
Expand All @@ -28,12 +27,34 @@ def test_graded_response(self):
options={'n_tune': 1000, 'n_samples': 1000})
result = girth_model(syn_data, progressbar=False)

def test_multidimensional_grm(self):
"""Testing Multidimensional GRM."""
rng = np.random.default_rng(29452344633211231635433213)

n_categories = 3
n_factors = 2

discrimnation = rng.uniform(-2, 2, (20, n_factors))
difficulty = np.sort(rng.standard_normal((20, n_categories - 1))*.5, axis=1)*-1
thetas = rng.standard_normal((n_factors, 250))

syn_data = create_synthetic_irt_polytomous(difficulty, discrimnation,
thetas, model='grm_md', seed=rng)

girth_model = GirthMCMC(model='GRM_MD', model_args=(n_categories, n_factors),
options={'n_tune': 1000, 'n_samples': 1000})
result = girth_model(syn_data, progressbar=False)

with self.assertRaises(AssertionError):
girth_model = GirthMCMC(model='GRM_MD', model_args=(n_categories, 1),
options={'n_tune': 1000, 'n_samples': 1000})
result = girth_model(syn_data, progressbar=False)


class TestPolytomousVariational(unittest.TestCase):
"""Tests variational inference for polytomous data."""

# Only smoke tests for now

def test_graded_response(self):
"""Testing the grm."""
np.random.seed(67841)
Expand All @@ -53,6 +74,26 @@ def test_graded_response(self):
'n_samples': 1000})
result = girth_model(syn_data, progressbar=False)

def test_multidimensional_grm(self):
"""Testing Multidimensional Variational GRM."""
rng = np.random.default_rng(42525414514169313321465315318434)

n_categories = 3
n_factors = 2

discrimnation = rng.uniform(-2, 2, (20, n_factors))
difficulty = np.sort(rng.standard_normal((20, n_categories-1))*.5, axis=1)*-1
thetas = rng.standard_normal((n_factors, 250))

syn_data = create_synthetic_irt_polytomous(difficulty, discrimnation,
thetas, model='grm_md', seed=rng)

girth_model = GirthMCMC(model='GRM_MD', model_args=(n_categories, n_factors),
options={'variational_inference': True,
'variational_samples': 1000,
'n_samples': 1000})
result = girth_model(syn_data, progressbar=False)


if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit 01820a5

Please sign in to comment.