Multidimensional Graded Response Model (#13)

* Adding multidimensional graded response model class. * Working version of multidimensional grm. * Final commit adding 2 Dimensional GRM model.
eribean · Nov 3, 2021 · 01820a5 · 01820a5
1 parent c1ade7e
commit 01820a5
Show file tree

Hide file tree

Showing 10 changed files with 225 additions and 29 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 [![CodeFactor](https://www.codefactor.io/repository/github/eribean/girth_mcmc/badge)](https://www.codefactor.io/repository/github/eribean/girth_mcmc)
 [![PyPI version](https://badge.fury.io/py/girth-mcmc.svg)](https://badge.fury.io/py/girth-mcmc)
 [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
+![PyPI - Downloads](https://img.shields.io/pypi/dm/girth_mcmc?style=flat-square&color=darkgreen)
 
 # GIRTH MCMC
 Item Response Theory using Markov Chain Monte Carlo / Variational Inference
@@ -10,7 +11,7 @@ Item Response Theory using Markov Chain Monte Carlo / Variational Inference
 We recommend using [Anaconda](https://www.anaconda.com/products/individual). Individual
 packages can be installed through pip otherwise.
 
-* Python >= 3.7.6
+* Python &ge; 3.8
 * Numpy
 * Scipy
 * Girth
@@ -33,14 +34,17 @@ pip install . -t $PYTHONPATH --upgrade
 # Supports
 
 **Unidimensional**
-* Rasch Model 
+
+* Rasch Model
 * 1PL Model
 * 2PL Model
 * 3PL Model
 * Graded Response Model
 
 **Multi-dimensional**
+
 * 2PL Model
+* Graded Response Model
 
 # Usage
 

diff --git a/girth_mcmc/dichotomous/multidimensional_twopl.py b/girth_mcmc/dichotomous/multidimensional_twopl.py
@@ -5,25 +5,13 @@
 from theano import tensor as tt
 
 from girth.multidimensional import initial_guess_md
+from girth_mcmc.utils import get_discrimination_indices
 
 
 __all__= ["multidimensional_twopl_model", "multidimensional_twopl_parameters",
           "multidimensional_twopl_initial_guess"]
 
 
-def _get_discrimination_indices(n_items, n_factors):
-    """Local function to get parameters for discrimination estimation."""
-
-    lower_indices = np.tril_indices(n_items, k=-1, m=n_factors)
-    diagonal_indices = np.diag_indices(n_factors)
-    lower_length = lower_indices[0].shape[0]
-
-    # Set constraints to be the final items
-    lower_indices = (n_items - 1 - lower_indices[0], lower_indices[1])
-    diagonal_indices = (n_items - 1 - diagonal_indices[0], diagonal_indices[1])
-
-    return diagonal_indices, lower_indices
-
 def multidimensional_twopl_model(dataset, n_factors):
     """Defines the mcmc model for multidimensional 2PL logistic estimation.
     
@@ -40,7 +28,7 @@ def multidimensional_twopl_model(dataset, n_factors):
     n_items, n_people = dataset.shape
     observed = dataset.astype('int')
 
-    diagonal_indices, lower_indices = _get_discrimination_indices(n_items, n_factors)
+    diagonal_indices, lower_indices = get_discrimination_indices(n_items, n_factors)
     lower_length = lower_indices[0].shape[0]
 
     twopl_pymc_model = pm.Model()
@@ -93,7 +81,7 @@ def multidimensional_twopl_parameters(trace):
     diagonal_entries = trace['Diagonal Discrimination'].mean(0)
     n_factors = diagonal_entries.shape[0]
 
-    diagonal_indices, lower_indices = _get_discrimination_indices(n_items, n_factors)
+    diagonal_indices, lower_indices = get_discrimination_indices(n_items, n_factors)
 
     discrimination = np.zeros((n_items, n_factors))
     discrimination[lower_indices] = trace['Lower Discrimination'].mean(0)
@@ -119,7 +107,7 @@ def multidimensional_twopl_initial_guess(dataset, n_factors):
     estimated_discrimination = initial_guess_md(dataset, n_factors)
 
     # Reformat into parameters for estimation
-    diagonal_indices, lower_indices = _get_discrimination_indices(n_items, n_factors)
+    diagonal_indices, lower_indices = get_discrimination_indices(n_items, n_factors)
 
     return {'Diagonal Discrimination': estimated_discrimination[diagonal_indices],
             'Lower Discrimination': estimated_discrimination[lower_indices]}

diff --git a/girth_mcmc/girth_class.py b/girth_mcmc/girth_class.py
@@ -9,14 +9,17 @@
                                     multidimensional_twopl_model, multidimensional_twopl_parameters,
                                     multidimensional_twopl_initial_guess, threepl_model,
                                     threepl_parameters)
-from girth_mcmc.polytomous import  graded_response_model, graded_response_parameters
+from girth_mcmc.polytomous import (graded_response_model, graded_response_parameters,
+                                   multidimensional_graded_model,
+                                   multidimensional_graded_parameters)
 
 
 class GirthMCMC(object):
     """GIRTH MCMC class to run estimation models using PyMC3.
 
     Parameters:
-        model: (string) ['Rasch', '1PL', '2PL', '3PL', 'GRM', '2PL_md'] which model to run
+        model: (string) ['Rasch', '1PL', '2PL', '3PL', 'GRM', '2PL_md', 'GRM_md'] 
+            which model to run
         model_args: (tuple) tuple of arguments to pass to model
         options: (dict) mcmc options dictionary
     
@@ -50,7 +53,10 @@ def __init__(self, model, model_args=None, options=None):
             # Multidimensional Models
             '2pl_md': (multidimensional_twopl_model, 
                        multidimensional_twopl_parameters,
-                       multidimensional_twopl_initial_guess)
+                       multidimensional_twopl_initial_guess),
+            'grm_md': (multidimensional_graded_model, 
+                       multidimensional_graded_parameters,
+                       lambda x, *y: multidimensional_twopl_initial_guess(x, y[1]))
         }[model.lower()]
 
         self.pm_model = model_parameters[0]
@@ -108,8 +114,6 @@ def __call__(self, dataset, **kwargs):
 
             trace = result.sample(self.options['n_samples'])
 
-
-
         else: #MCMC Sampler
             n_tune = self.options['n_tune'] // self.options['n_processors']
             n_samples = self.options['n_samples'] // self.options['n_processors']

diff --git a/girth_mcmc/polytomous/__init__.py b/girth_mcmc/polytomous/__init__.py
@@ -1 +1,2 @@
-from .graded_response_model import *
+from .graded_response_model import *
+from .multidimensional_grm import *
diff --git a/girth_mcmc/polytomous/multidimensional_grm.py b/girth_mcmc/polytomous/multidimensional_grm.py
@@ -0,0 +1,103 @@
+import pymc3 as pm
+from numpy import linspace, zeros, unique
+
+import theano
+from theano import tensor as tt
+
+from girth.multidimensional import initial_guess_md
+from girth_mcmc.utils import get_discrimination_indices
+
+
+__all__= ["multidimensional_graded_model", "multidimensional_graded_parameters"]
+
+
+def multidimensional_graded_model(dataset, n_categories, n_factors):
+    """Defines the mcmc model for the multidimensional graded response model.
+    
+    Args:
+        dataset: [n_items, n_participants] 2d array of measured responses
+        n_categories: (int) number of polytomous values (i.e. Number of Likert Levels)
+        n_factors: (int) number of factors to extract
+
+    Returns:
+        model: PyMC3 model to run
+    """
+    if n_factors < 2:
+        raise AssertionError(f"Multidimensional GRM model requires "
+                             f"two or more factors specified!")
+
+    n_items, n_people = dataset.shape
+    n_levels = n_categories - 1
+
+    # Need small deviation in offset to
+    # fit into pymc framework
+    mu_value = linspace(-0.1, 0.1, n_levels)
+
+    # Run through 0, K - 1
+    observed = dataset - dataset.min()
+
+    diagonal_indices, lower_indices = get_discrimination_indices(n_items, n_factors)
+    lower_length = lower_indices[0].shape[0]
+
+    graded_mcmc_model = pm.Model()
+
+    with graded_mcmc_model:
+        # Ability Parameters
+        ability = pm.Normal("Ability", mu=0, sigma=1, shape=(n_factors, n_people))
+
+        # Multidimensional Discrimination
+        discrimination = tt.zeros((n_items, n_factors), dtype=theano.config.floatX)
+        diagonal_discrimination = pm.Lognormal('Diagonal Discrimination', mu=0, 
+                                               sigma=0.25, shape=n_factors)
+        lower_discrimination = pm.Normal('Lower Discrimination', sigma=1, 
+                                          shape=lower_length)
+        discrimination = tt.set_subtensor(discrimination[diagonal_indices], 
+                                          diagonal_discrimination)
+
+        discrimination = tt.set_subtensor(discrimination[lower_indices], 
+                                          lower_discrimination)
+
+        # Threshold multilevel prior
+        sigma_difficulty = pm.HalfNormal('Difficulty_SD', sigma=1, shape=1)
+        for ndx in range(n_items):
+            thresholds = pm.Normal(f"Thresholds{ndx}", mu=mu_value, sigma=sigma_difficulty, 
+                                   shape=n_levels, transform=pm.distributions.transforms.ordered)
+
+            # Compute the log likelihood
+            kernel = pm.math.dot(discrimination[ndx], ability)
+            probabilities = pm.OrderedLogistic(f'Log_Likelihood{ndx}', cutpoints=thresholds, 
+                                               eta=kernel, observed=observed[ndx])
+
+    return graded_mcmc_model
+
+
+def multidimensional_graded_parameters(trace):
+    """Returns the parameters from an MCMC run.
+
+    Args:
+        trace: result from the mcmc run
+
+    Return:
+        return_dictionary: dictionary of found parameters
+    """
+    n_factors = trace['Diagonal Discrimination'].shape[1]
+    n_constraints = n_factors * (n_factors + 1) / 2 
+
+    n_items = int((trace['Lower Discrimination'].shape[1] + n_constraints) / n_factors)
+    n_levels = max(map(lambda ndx: trace[f'Thresholds{ndx}'].shape[1], 
+                       range(n_items)))
+
+    diagonal_indices, lower_indices = get_discrimination_indices(n_items, n_factors)
+
+    discrimination = zeros((n_items, n_factors))
+    discrimination[lower_indices] = trace['Lower Discrimination'].mean(0)
+    discrimination[diagonal_indices] = trace['Diagonal Discrimination'].mean(0)
+
+    thresholds = zeros((n_items, n_levels))    
+    for ndx in range(n_items):
+        thresholds[ndx] = trace[f'Thresholds{ndx}'].mean(0)
+
+    return {'Discrimination': discrimination,
+            'Difficulty': thresholds * -1, 
+            'Ability': trace['Ability'].mean(0).T,
+            'Difficulty Sigma': trace['Difficulty_SD'].mean(0)} 
diff --git a/girth_mcmc/utils/__init__.py b/girth_mcmc/utils/__init__.py
@@ -1,3 +1,4 @@
 from .options import *
+from .multidimensional_utils import *
 from .missing_data import *
 from .rayleigh import *
diff --git a/girth_mcmc/utils/multidimensional_utils.py b/girth_mcmc/utils/multidimensional_utils.py
@@ -0,0 +1,18 @@
+import numpy as np
+
+
+__all__ = ['get_discrimination_indices']
+
+
+def get_discrimination_indices(n_items, n_factors):
+    """Local function to get parameters for discrimination estimation."""
+
+    lower_indices = np.tril_indices(n_items, k=-1, m=n_factors)
+    diagonal_indices = np.diag_indices(n_factors)
+    lower_length = lower_indices[0].shape[0]
+
+    # Set constraints to be the final items
+    lower_indices = (n_items - 1 - lower_indices[0], lower_indices[1])
+    diagonal_indices = (n_items - 1 - diagonal_indices[0], diagonal_indices[1])
+
+    return diagonal_indices, lower_indices
diff --git a/setup.py b/setup.py
@@ -12,7 +12,7 @@
         packages=['girth_mcmc', 'girth_mcmc.dichotomous', 'girth_mcmc.polytomous', 
                   'girth_mcmc.utils'],
         package_dir={'girth_mcmc': 'girth_mcmc'},
-        version="0.4.1",
+        version="0.5.0",
         license="MIT",
         description="Bayesian Item Response Theory Estimation.",
         long_description=long_description.replace('<ins>','').replace('</ins>',''),
@@ -27,7 +27,6 @@
             'Intended Audience :: Science/Research',
             'Topic :: Scientific/Engineering', 
             'License :: OSI Approved :: MIT License',
-            'Programming Language :: Python :: 3.7',
             'Programming Language :: Python :: 3.8',
             'Programming Language :: Python :: 3.9'
         ]

diff --git a/test/test_polytomous.py b/test/test_polytomous.py
@@ -10,7 +10,6 @@ class TestPolytomous(unittest.TestCase):
     """Tests the mcmc for polytomous data."""
 
     # Only smoke tests for now
-
     def test_graded_response(self):
         """Testing the grm."""
         np.random.seed(46899)
@@ -28,12 +27,34 @@ def test_graded_response(self):
                                 options={'n_tune': 1000, 'n_samples': 1000})
         result = girth_model(syn_data, progressbar=False)
 
+    def test_multidimensional_grm(self):
+        """Testing Multidimensional GRM."""
+        rng = np.random.default_rng(29452344633211231635433213)
+
+        n_categories = 3
+        n_factors = 2
+
+        discrimnation = rng.uniform(-2, 2, (20, n_factors))
+        difficulty = np.sort(rng.standard_normal((20, n_categories - 1))*.5, axis=1)*-1        
+        thetas = rng.standard_normal((n_factors, 250))
+
+        syn_data = create_synthetic_irt_polytomous(difficulty, discrimnation, 
+                                                   thetas, model='grm_md', seed=rng)
+
+        girth_model = GirthMCMC(model='GRM_MD', model_args=(n_categories, n_factors),
+                                options={'n_tune': 1000, 'n_samples': 1000})
+        result = girth_model(syn_data, progressbar=False)
+
+        with self.assertRaises(AssertionError):
+            girth_model = GirthMCMC(model='GRM_MD', model_args=(n_categories, 1),
+                                    options={'n_tune': 1000, 'n_samples': 1000})
+            result = girth_model(syn_data, progressbar=False)
+
 
 class TestPolytomousVariational(unittest.TestCase):
     """Tests variational inference for polytomous data."""
 
     # Only smoke tests for now
-
     def test_graded_response(self):
         """Testing the grm."""
         np.random.seed(67841)
@@ -53,6 +74,26 @@ def test_graded_response(self):
                                          'n_samples': 1000})
         result = girth_model(syn_data, progressbar=False)
 
+    def test_multidimensional_grm(self):
+        """Testing Multidimensional Variational GRM."""
+        rng = np.random.default_rng(42525414514169313321465315318434)
+
+        n_categories = 3
+        n_factors = 2
+
+        discrimnation = rng.uniform(-2, 2, (20, n_factors))
+        difficulty = np.sort(rng.standard_normal((20, n_categories-1))*.5, axis=1)*-1        
+        thetas = rng.standard_normal((n_factors, 250))
+
+        syn_data = create_synthetic_irt_polytomous(difficulty, discrimnation, 
+                                                   thetas, model='grm_md', seed=rng)
+
+        girth_model = GirthMCMC(model='GRM_MD', model_args=(n_categories, n_factors),
+                                options={'variational_inference': True,
+                                         'variational_samples': 1000,
+                                         'n_samples': 1000})
+        result = girth_model(syn_data, progressbar=False)        
+
 
 if __name__ == '__main__':
     unittest.main()