/** @overview
usage: node validate_allstudents [-c] [-w] [-t] [-p] [-a]
flags:
-c : suppress critical issues output
-w : suppress warning issues output
-t : suppress value type issues output
-p : limit to only present day (current) students
-a : limit to only alums (students who graduated or left)
program should be located in grad.houptlab (one level above /jekyll for gradawan (aka gradphile))
looks for the files:
/jekyll/_data/allstudents.json
/jekyll/_data/schemata/student_schema.yaml
/jekyll/_data/dept/academic_plan_codes.yaml
output file of issues:
/jekyll/_data/issues_allstudents.csv
*/
var suppress_critical = (process.argv.indexOf('-c') != -1);
var suppress_warnings = (process.argv.indexOf('-w') != -1);
var suppress_types = (process.argv.indexOf('-t') != -1);
var limit_to_current_students = (process.argv.indexOf('-p') != -1);
var limit_to_alums = (process.argv.indexOf('-a') != -1);
var min_valid_year = 1900;
var max_valid_year = 2050;
var fs = require('fs');
var yaml = require('js-yaml');
var bc = require('./bc_utilities.js');
var allstudents_path = __dirname + '/jekyll/_data/allstudents.json';
if (typeof process.argv[2] != "undefined" && process.argv[2] != null) {
allstudents_path = process.argv[2];
}
var allstudents = bc.readJSONfromCWD(allstudents_path);
var department = yaml.safeLoad(fs.readFileSync(__dirname + '/jekyll/_data/dept/department.yaml', 'utf8'));
// var areaCodeOptions = require(__dirname+'/jekyll/_data/dept/areaCodeOptions.json');
// var planCodeOptions = require(__dirname+'/jekyll/_data/dept/planCodeOptions.json');
var areaCodeOptions= [];
if (typeof department.subdivisions != "undefined") {
areaCodeOptions = department.subdivisions.map(function (s) { return { "value": s.abbr }; });
}
var planCodeOptions = department.plan_codes.map(
function (p) {
return { "value": p };
}
);
var ethnicityCodeOptions = bc.readJSONfromAppDir(__dirname,'jekyll/_data/const/ethnicityCodeOptions.json');
var residencyCodeOptions = bc.readJSONfromAppDir(__dirname,'jekyll/_data/univ/residencyCodeOptions.json');
var degreeOutcomeOptions = bc.readJSONfromAppDir(__dirname,'jekyll/_data/dept/degreeOutcomeOptions.json');
var postgradPositionOptions = bc.readJSONfromAppDir(__dirname,'jekyll/_data/const/postgradPositionOptions.json');
// globals so we can call out student if a deep error
var current_fsuid;
var current_name;
var student_schema = yaml.safeLoad(fs.readFileSync(__dirname + '/jekyll/_data/schemata/student_schema.yaml', 'utf8'));
var plan_schema = yaml.safeLoad(fs.readFileSync(__dirname + '/jekyll/_data/dept/academic_plan_codes.yaml', 'utf8'));
var issues = new Array;
issues.push ("ErrorType,fsuid,last_name,first_name,schema,issue, key,value\n")
// console.log(student_schema);
var schema_keys = Object(student_schema).keys;
var dept_plan_codes = Object.getOwnPropertyNames(plan_schema);
/* ------------------------------------------------------------------------------- */
function errortype_issue_key(type,student,issue,schema,key,value) {
return type + "," + student.fsuid + "," +student.last_name+ ","
+ student.first_name + "," + schema + "," + issue + "," + key + "," + value + "\n";
}
function critical_issue_key(student,issue,schema,key,value) {
if (key == "outcome") {
console.log("CRITICAL outcome: " + student.fsuid + "," +student.last_name+ ","
+ student.first_name + value);
}
if (!suppress_critical) {
issues.push( errortype_issue_key("Critical",student,issue,schema,key,value));
}
};
function warning_issue_key(student,issue,schema,key,value) {
if (!suppress_warnings) {
issues.push( errortype_issue_key("Warning",student,issue,schema,key,value));
}
};
function type_issue_key(student,issue,schema,key,value) {
if (!suppress_types) {
issues.push( errortype_issue_key("TypeFormat",student,issue,schema,key,value));
}
};
/* ------------------------------------------------------------------------------- */
function term2months(year_term) {
if (typeof year_term === 'undefined' || null == year_term) {
console.log("UNDEFINED TERM " + current_name + " " + current_fsuid);
return -1;
}
// terms are coded as "yyyy/t", where t = 1, 6, or 9
// it's easiest to make comparisons by converting to months since 0 AD
var year = parseInt(year_term.substring(0,4),10);
var term = parseInt(year_term.substring(5),10);
return year * 12 + term;
}
/* ------------------------------------------------------------------------------- */
function checkOption(options,value,value_type) {
for (var i = 0 ; i < options.length; i++) {
if (value == options[i].value) {
return true;
}
}
// console.log("Invalid " + value_type + ": " + value);
return false;
}
/* ------------------------------------------------------------------------------- */
function validate_type(value_type,value) {
// check to make the given value is in the correct format for the given value_type
// value_type can be:
// date: "mm/dd/yyyy"
// term: "yyyy/t", where t 1, 6, or 9
// text: a string
// boolean
// integer
// timestamp: unix time stamp in seconds since 1970 (integer)
// seconds: a duration in seconds (integer)
// real: floating point number
// years: duration in years (floating point number)
// one of a number of Options (e.g. planCodeOptions, ethnicityCodeOptions)
switch (value_type) {
case 'date':
// Validates that the input string is a valid date formatted as "mm/dd/yyyy"
// http://stackoverflow.com/questions/6177975/how-to-validate-date-with-format-mm-dd-yyyy-in-javascript
if (typeof value != 'string') {
return false;
}
// First check for the pattern
if (!/^\d{1,2}\/\d{1,2}\/\d{4}$/.test(value)) { return false; }
// Parse the date parts to integers
var parts = value.split("/");
var day = parseInt(parts[1], 10);
var month = parseInt(parts[0], 10);
var year = parseInt(parts[2], 10);
// Check the ranges of month and year
if (year < min_valid_year || year > max_valid_year || month == 0 || month > 12) {
return false;
}
var monthLength = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ];
// Adjust for leap years
if (year % 400 == 0 || (year % 100 != 0 && year % 4 == 0)) {
monthLength[1] = 29;
}
// Check the range of the day
return (day > 0 && day <= monthLength[month - 1]);
break;
case 'term':
if (typeof value != 'string') { return false; }
// First check for the pattern "yyyy/t"
if (!/^\d{4}\/\d{1}$/.test(value)) { return false; }
// Parse the date parts to integers
var parts = value.split("/");
var term = parseInt(parts[1], 10);
var year = parseInt(parts[0], 10);
// check for valid year and valid terms
if (year < min_valid_year || year > max_valid_year ) { return false; }
return (term == 1 || term == 6 || term == 9);
break;
case 'yyyy':
// check for pattern yyyy
if (!/^\d{4}$/.test(value)) { return false; }
var year = parseInt(value, 10);
if (year < min_valid_year || year > max_valid_year ) { return false; }
break;
case 'ECMA-262 date':
// note:(,*) included to that toGMTString is recognized
if (!/^^(Sun|Mon|Tue|Wed|Thu|Fri|Sat)(,*)\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s[0-9]{1,2}\s[0-9]{4}\s[0-9]{2}:[0-9]{2}:[0-9]{2}\sGMT(-|\+)[0-9]{4}(\s\([a-zA-Z\s]+\))*/.test(value)) {
console.log("Bad ECMA-262 Date: " + value);
return false; }
break;
case 'text':
return (typeof value == 'string' || value instanceof String);
break;
case 'boolean':
return (typeof value == 'boolean');
break;
case 'integer':
case 'timestamp':
case 'seconds':
case 'int_year':
return (!Number.isNaN(value) && Number.isInteger(Number(value)));
break;
case 'real':
case 'years':
case 'fp_year':
return (!Number.isNaN(value));
break;
case 'data.areaCodeOptions':
return (checkOption(areaCodeOptions,value,value_type));
break;
case 'data.planCodeOptions':
return (checkOption(planCodeOptions,value,value_type) );
break;
case 'const.ethnicityCodeOptions':
return (checkOption(ethnicityCodeOptions,value,value_type));
break;
case 'univ.residencyCodeOptions':
return (checkOption(residencyCodeOptions,value,value_type));
break;
case 'dept.degreeOutcomeOptions':
return (checkOption(degreeOutcomeOptions,value,value_type) );
break;
case 'const.postgradPositionOptions':
return (checkOption(postgradPositionOptions,value,value_type));
break;
case 'collection':
return true;
// NOTE: make this false so we remember to fix it!
// TODO: fix it
break;
}
return true;
};
/* ------------------------------------------------------------------------------- */
function validate_student_category(student, category) {
// add critical/warning strings to the issues array for this student's info at this category
// items in student_schema belong to different categories:
// deprecated, admin, admissions, personal, academic, postgrad
// required items which are missing are added to array of issues as critical
// non-required (optional) items which are missing are added to array of issues as warnings
// if an item is present, the type of the item value is checked
for ( var i = 0; i < student_schema.length; i++) {
var item = student_schema[i];
if (item.category == category) {
// is item missing?
if (typeof student[item.key] == 'undefined' || null == student[item.key] || (typeof student[item.key] == 'string' && 0 == student[item.key].length) ) {
if (item.required == 'yes') {
critical_issue_key(student,'missing','student',item.key,"????");
}
else {
if (!suppress_warnings){
warning_issue_key(student,'missing','student',item.key,"????");
}
}
}
else {
// check format of item
if (!validate_type(item.value_type,student[item.key])) {
type_issue_key(student,'invalid ' + item.value_type,'student',item.key,student[item.key]);
}
} // has key-value pair
} // in our category
} // next key
};
/* ------------------------------------------------------------------------------- */
function check_committee_member(student,plan,key, value) {
if (typeof value != 'undefined' && value != null ) {
if (!Object(value).hasOwnProperty('fsuid') || value.fsuid == null || value.fsuid.length == 0) {
critical_issue_key(student,'missing',plan.plan_code,key+".fsuid","????");
}
if (!Object(value).hasOwnProperty('name') || value.name == null || value.name.length == 0) {
warning_issue_key(student,'missing',plan.plan_code,key+".name","????");
}
if (!Object(value).hasOwnProperty('email') || value.email == null || value.email.length == 0) {
warning_issue_key(student,'missing',plan.plan_code,key+".email","????");
}
}
};
/* ------------------------------------------------------------------------------- */
function validate_plan_code(plan_code) {
return (dept_plan_codes.indexOf(plan_code) != -1);
};
/* ------------------------------------------------------------------------------- */
function validate_student_plans(student) {
// add critical/warning strings to the issues array for this student's academic plans
// check to make sure the student has academic plans (needs at least one)
// run through each plan, make sure it has a valid plan_code (eg BIOLOGYPD)
// make sure each item in the plan matches the items in the plan_schema for its plan_code
// we ignore "derived" items in the plans, because they are calculated at build time
// from values stored in the mongodb database
// required items which are missing are added to array of issues as critical
// non-required (optional) items which are missing are added to array of issues as warnings
// if an item is present, the type of the item value is checked
// we also make some adhoc validations to make sure eg last_term falls after admit_term
// then check on chairs and committee members
// (if cmte is present -- for older students we don't have cmte data yet)
// console.log("student: " + student.last_name + " " + student.fsuid);
current_fsuid = student.fsuid;
current_name = student.last_name;
if (typeof student.academic_plans === 'undefined' || student.academic_plans== null) {
critical_issue_key(student,'missing','student',"academic_plans","????");
}
else {
var plan_keys = Object.keys(student.academic_plans);
for (var i = 0; i < plan_keys.length; i++ ) {
var plan_code = plan_keys[i];
if (!validate_plan_code(plan_code)) {
critical_issue_key(student,'invalid plan code',plan_code,"plan_code",plan_code);
}
else {
// SCHEMA VALIDATIONS
var plan = student.academic_plans[plan_code];
var plan_scheme = plan_schema[plan_code];
var requirements = plan_scheme['requirements'];
for ( var i = 0; i < requirements.length; i++) {
var item = requirements[i];
// is item missing?
if (typeof plan[item.key] == 'undefined' || null == plan[item.key] || (typeof plan[item.key] == 'string' && 0 == plan[item.key].length) ) {
if (item.required == 'yes') {
critical_issue_key(student,'missing',plan_code,item.key,"????");
}
else {
if (!suppress_warnings){
warning_issue_key(student,'missing',plan_code,item.key,"????");
}
}
} // missing key-value pair
else {
// check format of item
if (!validate_type(item.value_type,plan[item.key])) {
type_issue_key(student,'invalid ' + item.value_type,plan_code,item.key,plan[item.key]);
}
} // has key-value pair
} // next requirement key
var info_items = plan_scheme['info'];
for ( var i = 0; i < info_items.length; i++) {
var item = info_items[i];
if (item.category != 'derived') {
// is item missing?
if (typeof plan[item.key] == 'undefined' || null == plan[item.key] || (typeof plan[item.key] == 'string' && 0 == plan[item.key].length) ) {
if (item.required == 'yes') {
critical_issue_key(student,'missing',plan_code,item.key,"????");
}
else {
if (!suppress_warnings){
warning_issue_key(student,'missing',plan_code,item.key,"????");
}
}
}
else {
// check format of item
if (!validate_type(item.value_type,plan[item.key])) {
type_issue_key(student,'invalid ' + item.value_type,plan_code,item.key,plan[item.key]);
}
} // has key-value pair
} // not derived
} // next key
// ADHOC VALIDATIONS
// TERMS
// 1. compare last_term to admit_term, and both to student.admit_term and student.last_term
// 2. make sure all objects of value_type date fall between admit_term and last_term
// 3. if last_term, then check on outcome -- make sure it is one of a limited number of valid outcomes
// 4. if last_term and plan_info has prelims, make sure there is a prelim date
if (term2months(plan['admit_term']) < term2months(student['admit_term']) ) {
critical_issue_key(student,'plan admit_term < student admit_term',plan_code,'last_term',plan['admit_term'] + " < " + student['admit_term']);
}
if (typeof student['last_term'] != 'undefined' && null != student['last_term'] ) {
if (term2months(plan['admit_term']) > term2months(student['last_term']) ) {
critical_issue_key(student,'plan admit_term > student last_term',plan_code,'last_term',plan['admit_term'] + " > " + student['last_term']);
}
}
if (typeof plan['last_term'] != 'undefined' && null != plan['last_term'] ) {
if (plan_scheme['has_prelim'] == true) {
if (typeof plan['prelim_exam_passed_date'] == 'undefined' || null == plan['prelim_exam_passed_date'] ) {
critical_issue_key(student,'missing',plan_code,'prelim_exam_passed_date',"????");
}
}
if (term2months(plan['last_term'])+4 <= term2months(plan['admit_term']) ) {
critical_issue_key(student,'last_term < plan admit_term',plan_code,'last_term',plan['last_term'] + " < " + plan['admit_term']);
}
if (term2months(plan['last_term'])+4 <= term2months(student['admit_term']) ) {
critical_issue_key(student,'last_term < student admit_term',plan_code,'last_term',plan['last_term'] + " < " + plan['admit_term']);
}
if (typeof student['last_term'] != 'undefined' && null != student['last_term'] ) {
if (term2months(plan['last_term']) > term2months(student['last_term']) ) {
critical_issue_key(student,'plan last_term > student last_term',plan_code,'last_term',plan['last_term'] + " > " + student['last_term']);
}
}
if (typeof plan['outcome'] == 'undefined' || null == plan['outcome'] ) {
critical_issue_key(student,'missing',plan_code,'outcome',"????");
}
}
// if they exist, check chairs and members and univ_rep for name, fsuid, and emails
check_committee_member(student,plan,'chair',plan.chair);
check_committee_member(student,plan,'co_chair1',plan.co_chair1);
check_committee_member(student,plan,'co_chair2',plan.co_chair2);
check_committee_member(student,plan,'univ_rep', plan.univ_rep);
if (typeof plan.members != 'undefined') {
check_committee_member(student,plan,'member[0]',plan.members[0]);
check_committee_member(student,plan,'member[1]',plan.members[1]);
check_committee_member(student,plan,'member[2]',plan.members[2]);
check_committee_member(student,plan,'member[3]',plan.members[3]);
}
} // valid plan_code
} // next plan_code
} // has academic_plans
}; // validate_student_plans
/* ------------------------------------------------------------------------------- */
function validate_student_last_term_consistency(student) {
// check that student.last_term is equal to last_term of at least one of their plans
var has_last_term = (typeof student.last_term != 'undefined' && student.last_term != null && validate_type('term',student.last_term));
var last_terms = new Array;
if (!(typeof student.academic_plans === 'undefined' || student.academic_plans== null)) {
var plan_keys = Object.keys(student.academic_plans);
for (var i = 0; i < plan_keys.length; i++ ) {
var plan_code = plan_keys[i];
var plan = student.academic_plans[plan_code];
if (typeof plan['last_term'] != 'undefined' && plan['last_term'] != null) {
var last_term = {};
last_term.term = plan['last_term'];
last_term.months = term2months(plan['last_term']);
if (typeof plan['outcome'] != 'undefined' && plan['outcome'] != null) {
last_term.outcome = plan['outcome'];
}
else {
last_term.outcome = null;
}
last_terms.push(last_term);
}
} // next plan
} // has plans
if (!has_last_term) {
if (last_terms.length == 0) { return true; }
//console.log(student.last_name + " last_terms length: "+ last_terms.length + " academic_plans: " + Object.keys(student.academic_plans).length);
if (last_terms.length == Object.keys(student.academic_plans).length) {
critical_issue_key(student,'missing','student','last_term',"last term missing but has at least one plan[last_term]");
return false;
}
}
else {
if (last_terms.length != Object.keys(student.academic_plans).length) {
critical_issue_key(student,'missing','student','last_term',"student has last term, but at least one plan is missing its last_term");
}
student_last_months = term2months(student.last_term);
var max_months = 0;
var matches_plan = -1;
for (var i=0; i< last_terms.length; i++) {
if (student_last_months == last_terms[i].months) {
matches_plan = i;
}
if (last_terms[i].months > max_months) {
max_months = last_terms[i].months;
}
}
if (matches_plan < 0) {
critical_issue_key(student,'mismatch ','student','last_term',"last term doesn't match any one plan last_terms");
return false;
}
else {
if ( student_last_months < max_months) {
critical_issue_key(student,'mismatch ','student','last_term',"last term earlier than at least one plan last_terms");
return false;
}
if (typeof student.outcome != 'undefined' && student.outcome != null && last_terms[matches_plan].outcome != null) {
if (student.outcome != last_terms[matches_plan].outcome) {
critical_issue_key(student,'mismatch ','student','outcome',"student outcome doesn't match plan outcome");
return false;
}
}
}
}
return true;
}; // validate_student_last_term_consistency
/* ------------------------------------------------------------------------------- */
function validate_bowers_fields(student) {
// admit terms and last terms were imported from Judy Bowers old files
// we check to see if out values match those of Bower's, and flag if a mismatch
// (we should at least go back and verify to see which terms are correct)
if (!(typeof student.academic_plans === 'undefined' || student.academic_plans== null)) {
var plan_keys = Object.keys(student.academic_plans);
for (var i = 0; i < plan_keys.length; i++ ) {
var plan_code = plan_keys[i];
var plan = student.academic_plans[plan_code];
if (typeof plan['admit_bowers'] != 'undefined' && plan['admit_bowers'] != null) {
if ( plan['admit_bowers'] != plan['admit_term']) {
critical_issue_key(student,'mismatch',plan_code,'admit_bowers',"admit_bowers doesn't match admit_term");
}
}
if (typeof plan['last_term'] != 'undefined' && plan['last_term'] != null) {
if (typeof plan['complete_bowers'] != 'undefined' && plan['complete_bowers'] != null) {
if ( plan['complete_bowers'] != plan['last_term']) {
critical_issue_key(student,'mismatch',plan_code,'complete_bowers',"complete_bowers doesn't match last_term");
}
}
}
}
}
}; // validate_bowers_fields
/* ------------------------------------------------------------------------------- */
for (var i=0; i< allstudents.length; i++) {
var current_student = (typeof allstudents[i].last_term == 'undefined' || allstudents[i].last_term == null);
var check_student = true;
if (!current_student && limit_to_current_students) { check_student = false; }
if (current_student && limit_to_alums) { check_student = false; }
if (check_student) {
validate_student_category(allstudents[i],'admin');
validate_student_category(allstudents[i],'personal');
validate_student_category(allstudents[i],'academic');
validate_student_plans(allstudents[i]);
validate_student_last_term_consistency(allstudents[i]);
validate_bowers_fields(allstudents[i]);
}
} // next student
console.log("Found " + issues.length + " issues");
var file = fs.createWriteStream(__dirname + '/jekyll/_data/issues_allstudents.csv');
file.on('error', function(err) { /* error handling */ });
issues.forEach(function(i) { file.write(i); });
file.end();