Skip to content

Commit

Permalink
Merge pull request #12 from eribean/discrimination_constraints_md
Browse files Browse the repository at this point in the history
Added initial guess to the multidimensional 2pl model
  • Loading branch information
eribean authored Oct 11, 2021
2 parents 3338a43 + a66c3fb commit c1ade7e
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 42 deletions.
29 changes: 17 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[![CodeFactor](https://www.codefactor.io/repository/github/eribean/girth_mcmc/badge)](https://www.codefactor.io/repository/github/eribean/girth_mcmc)
[![PyPI version](https://badge.fury.io/py/girth-mcmc.svg)](https://badge.fury.io/py/girth-mcmc)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)

# GIRTH MCMC
Item Response Theory using Markov Chain Monte Carlo / Variational Inference
Expand All @@ -17,17 +17,21 @@ packages can be installed through pip otherwise.
* PyMC3

## Installation

Via pip
```

```sh
pip install girth_mcmc --upgrade
```

From Source
```

```sh
pip install . -t $PYTHONPATH --upgrade
```

# Supports

**Unidimensional**
* Rasch Model
* 1PL Model
Expand All @@ -39,7 +43,9 @@ pip install . -t $PYTHONPATH --upgrade
* 2PL Model

# Usage

Subject to change but for now:

```python
import numpy as np
from girth import create_synthetic_irt_dichotomous
Expand All @@ -59,6 +65,7 @@ print(results)
```

for the graded response model, pass in the number of categories

```python
import numpy as np
from girth import create_synthetic_irt_polytomous
Expand All @@ -81,6 +88,7 @@ print(results)
```

Is some data missing? Tag it with a convenience function and run it like normal

```python
import numpy as np
from girth import create_synthetic_irt_dichotomous
Expand Down Expand Up @@ -132,24 +140,21 @@ print(results_variational)
## Unittests

**pytest** with coverage.py module
```
pytest --cov=girth_mcmc --cov-report term
```

**nose** with coverage.py module
```
nosetests --with-coverage --cover-package=girth_mcmc
```sh
pytest --cov=girth_mcmc --cov-report term
```

## Contact

Ryan Sanchez
[email protected]


## Other Estimation Packages
If you are looking for Marginal Maximum Likelihood estimation routines,
check out [GIRTH](https://eribean.github.io/girth/).

If you are looking for Marginal Maximum Likelihood estimation routines,
check out [GIRTH](https://eribean.github.io/girth/), a graphical interface
is also at [GoFactr](https://gofactr.com)

## License

Expand Down
56 changes: 45 additions & 11 deletions girth_mcmc/dichotomous/multidimensional_twopl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,25 @@

from theano import tensor as tt

from girth.multidimensional import initial_guess_md

__all__= ["multidimensional_twopl_model", "multidimensional_twopl_parameters"]

__all__= ["multidimensional_twopl_model", "multidimensional_twopl_parameters",
"multidimensional_twopl_initial_guess"]


def _get_discrimination_indices(n_items, n_factors):
"""Local function to get parameters for discrimination estimation."""

lower_indices = np.tril_indices(n_items, k=-1, m=n_factors)
diagonal_indices = np.diag_indices(n_factors)
lower_length = lower_indices[0].shape[0]

# Set constraints to be the final items
lower_indices = (n_items - 1 - lower_indices[0], lower_indices[1])
diagonal_indices = (n_items - 1 - diagonal_indices[0], diagonal_indices[1])

return diagonal_indices, lower_indices

def multidimensional_twopl_model(dataset, n_factors):
"""Defines the mcmc model for multidimensional 2PL logistic estimation.
Expand All @@ -24,11 +40,9 @@ def multidimensional_twopl_model(dataset, n_factors):
n_items, n_people = dataset.shape
observed = dataset.astype('int')

# These matrices help with the discrimination constraints
lower_indicies = np.tril_indices(n_items, k=-1, m=n_factors)
diagonal_indices = np.diag_indices(n_factors)
lower_length = lower_indicies[0].shape[0]

diagonal_indices, lower_indices = _get_discrimination_indices(n_items, n_factors)
lower_length = lower_indices[0].shape[0]

twopl_pymc_model = pm.Model()

with twopl_pymc_model:
Expand All @@ -49,7 +63,7 @@ def multidimensional_twopl_model(dataset, n_factors):
discrimination = tt.set_subtensor(discrimination[diagonal_indices],
diagonal_discrimination)

discrimination = tt.set_subtensor(discrimination[lower_indicies],
discrimination = tt.set_subtensor(discrimination[lower_indices],
lower_discrimination)

# Compute the probabilities
Expand Down Expand Up @@ -79,14 +93,34 @@ def multidimensional_twopl_parameters(trace):
diagonal_entries = trace['Diagonal Discrimination'].mean(0)
n_factors = diagonal_entries.shape[0]

lower_indicies = np.tril_indices(n_items, k=-1, m=n_factors)
diagonal_indices = np.diag_indices(n_factors)
discrimination = np.zeros((n_items, n_factors))
diagonal_indices, lower_indices = _get_discrimination_indices(n_items, n_factors)

discrimination[lower_indicies] = trace['Lower Discrimination'].mean(0)
discrimination = np.zeros((n_items, n_factors))
discrimination[lower_indices] = trace['Lower Discrimination'].mean(0)
discrimination[diagonal_indices] = trace['Diagonal Discrimination'].mean(0)

return {'Discrimination': discrimination,
'Difficulty': difficulty,
'Ability': trace['Ability'].mean(0).T,
'Difficulty Sigma': trace['Difficulty_SD'].mean()}


def multidimensional_twopl_initial_guess(dataset, n_factors):
"""Initializes initial guess for multidimensional twopl model.
Args:
dataset: [n_items, n_participants] 2d array of measured responses
n_factors: (int) number of factors to extract
Returns:
estimated_discrimination: estimated discrimination parameters
"""
n_items = dataset.shape[0]
estimated_discrimination = initial_guess_md(dataset, n_factors)

# Reformat into parameters for estimation
diagonal_indices, lower_indices = _get_discrimination_indices(n_items, n_factors)

return {'Diagonal Discrimination': estimated_discrimination[diagonal_indices],
'Lower Discrimination': estimated_discrimination[lower_indices]}

51 changes: 38 additions & 13 deletions girth_mcmc/girth_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,42 +7,60 @@
onepl_model, onepl_parameters,
twopl_model, twopl_parameters,
multidimensional_twopl_model, multidimensional_twopl_parameters,
threepl_model, threepl_parameters)
multidimensional_twopl_initial_guess, threepl_model,
threepl_parameters)
from girth_mcmc.polytomous import graded_response_model, graded_response_parameters


class GirthMCMC(object):
"""GIRTH MCMC class to run estimation models using PyMC3.
Parameters:
model: (string) ['Rasch', '1PL', '2PL', '3PL', 'GRM'] which model to run
model: (string) ['Rasch', '1PL', '2PL', '3PL', 'GRM', '2PL_md'] which model to run
model_args: (tuple) tuple of arguments to pass to model
options: (dict) mcmc options dictionary
Options:
* n_processors: (int) number of processors
* n_tune: number of "burn-in" samples to run
* n_samples: number of estimation samples
* initial_guess: (boolean) use initial estimate in multidimensional
methods
Notes:
'GRM' requires setting the number of levels
'2PL_mirt' requires setting the number of factors
'2PL_md' requires setting the number of factors
"""

def __init__(self, model, model_args=None, options=None):
"""Constructor method to run markov models."""
self.options = validate_mcmc_options(options)
self.model = model.lower()
self.model_args = model_args
self.pm_model = {'rasch': rasch_model, '1pl': onepl_model,
'2pl': twopl_model, '3pl': threepl_model,
'2pl_mirt': multidimensional_twopl_model,
'grm': graded_response_model}[model.lower()]

self.return_method = {'rasch': rasch_parameters, '1pl': onepl_parameters,
'2pl': twopl_parameters, '3pl': threepl_parameters,
'2pl_mirt': multidimensional_twopl_parameters,
'grm': graded_response_parameters}[model.lower()]
# Trace Model, Parameters Extraction, Initial guess
model_parameters = {
# Unidimensional Models
'rasch': (rasch_model, rasch_parameters, None),
'1pl': (onepl_model, onepl_parameters, None),
'2pl': (twopl_model, twopl_parameters, None),
'3pl': (threepl_model, threepl_parameters, None),
'grm': (graded_response_model, graded_response_parameters, None),

# Multidimensional Models
'2pl_md': (multidimensional_twopl_model,
multidimensional_twopl_parameters,
multidimensional_twopl_initial_guess)
}[model.lower()]

self.pm_model = model_parameters[0]
self.return_method = model_parameters[1]

if self.options['initial_guess'] and model_parameters[2] is not None:
self.initial_guess = model_parameters[2]

else:
self.initial_guess = lambda x, *args: None

self.trace = None

Expand All @@ -54,13 +72,17 @@ def build_model(self, dataset):
Returns:
pymc_model: model ready to run
initial_guess: dictionary of start values for sampler
"""
if self.model_args:
local_model = self.pm_model(dataset, *self.model_args)
initial_guess = self.initial_guess(dataset, *self.model_args)

else:
local_model = self.pm_model(dataset)
initial_guess = self.initial_guess(dataset)

return local_model
return local_model, initial_guess

def __call__(self, dataset, **kwargs):
"""Begins the MCMC sampling process.
Expand All @@ -75,17 +97,19 @@ def __call__(self, dataset, **kwargs):
results_dictionary: dictionary of mean a posterori item values
"""
# Run the sampling
built_model = self.build_model(dataset)
built_model, initial_guess = self.build_model(dataset)

# Run the Model
if self.options['variational_inference']:
with built_model:
result = pm.fit(method=self.options['variational_model'],
start=initial_guess,
n=self.options['variational_samples'], **kwargs)

trace = result.sample(self.options['n_samples'])



else: #MCMC Sampler
n_tune = self.options['n_tune'] // self.options['n_processors']
n_samples = self.options['n_samples'] // self.options['n_processors']
Expand All @@ -94,6 +118,7 @@ def __call__(self, dataset, **kwargs):
trace = pm.sample(n_samples, tune=n_tune,
chains=self.options['n_processors'],
cores=self.options['n_processors'],
start=initial_guess,
return_inferencedata=False, **kwargs)

# store the trace
Expand Down
7 changes: 5 additions & 2 deletions girth_mcmc/utils/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def default_mcmc_options():
"n_tune": 2500, "n_samples": 10000,
"variational_inference": False,
"variational_model": 'advi',
"variational_samples": 15000}
"variational_samples": 15000,
"initial_guess": True}


def validate_mcmc_options(options_dict=None):
Expand All @@ -61,7 +62,9 @@ def validate_mcmc_options(options_dict=None):
'variational_model':
lambda x: x in ['advi', 'svgd', 'fullrank_advi'],
'variational_samples':
lambda x: isinstance(x, int) and x > 100
lambda x: isinstance(x, int) and x > 100,
"initial_guess":
lambda x: isinstance(x, bool)
}

# A complete options dictionary
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
packages=['girth_mcmc', 'girth_mcmc.dichotomous', 'girth_mcmc.polytomous',
'girth_mcmc.utils'],
package_dir={'girth_mcmc': 'girth_mcmc'},
version="0.4.0",
version="0.4.1",
license="MIT",
description="Bayesian Item Response Theory Estimation.",
long_description=long_description.replace('<ins>','').replace('</ins>',''),
Expand Down
6 changes: 3 additions & 3 deletions test/test_dichotomous.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_twopl_multidimensional(self):

syn_data = create_synthetic_mirt_dichotomous(difficulty, discrimination, thetas)

girth_model = GirthMCMC(model='2PL_MIRT', model_args=(3,),
girth_model = GirthMCMC(model='2PL_MD', model_args=(3,),
options={'n_tune': 500, 'n_samples': 1000})
result = girth_model(syn_data, progressbar=False)

Expand Down Expand Up @@ -159,15 +159,15 @@ def test_twopl_multidimensional(self):

syn_data = create_synthetic_mirt_dichotomous(difficulty, discrimination, thetas)

girth_model = GirthMCMC(model='2PL_MIRT', model_args=(1,),
girth_model = GirthMCMC(model='2PL_MD', model_args=(1,),
options={'variational_inference': True,
'variational_samples': 1000,
'n_samples': 1000})

with self.assertRaises(AssertionError):
girth_model(syn_data, progressbar=False)

girth_model = GirthMCMC(model='2PL_MIRT', model_args=(3,),
girth_model = GirthMCMC(model='2PL_MD', model_args=(3,),
options={'variational_inference': True,
'variational_samples': 1000,
'n_samples': 1000})
Expand Down
Loading

0 comments on commit c1ade7e

Please sign in to comment.