Source: validate_allstudents.js



/** @overview

    usage: node validate_allstudents [-c] [-w] [-t] [-p] [-a]

	flags:
		-c : suppress critical issues output
		-w : suppress warning issues output
		-t : suppress value type issues output
		-p : limit to only present day (current) students
		-a : limit to only alums (students who graduated or left)


	program should be located in grad.houptlab (one level above /jekyll for gradawan (aka gradphile))

	looks for the files:

				/jekyll/_data/allstudents.json
				/jekyll/_data/schemata/student_schema.yaml
				/jekyll/_data/dept/academic_plan_codes.yaml

	output file of issues:

				/jekyll/_data/issues_allstudents.csv

*/


var suppress_critical = (process.argv.indexOf('-c') != -1);
var suppress_warnings = (process.argv.indexOf('-w') != -1);
var suppress_types = (process.argv.indexOf('-t') != -1);
var limit_to_current_students = (process.argv.indexOf('-p') != -1);
var limit_to_alums = (process.argv.indexOf('-a') != -1);




var min_valid_year = 1900;
var max_valid_year = 2050;

var fs = require('fs');
var yaml = require('js-yaml');
var bc = require('./bc_utilities.js');

var allstudents_path = __dirname + '/jekyll/_data/allstudents.json';
if (typeof  process.argv[2] != "undefined" && process.argv[2] != null) {
	allstudents_path = process.argv[2];
}
var allstudents = bc.readJSONfromCWD(allstudents_path);


var department =  yaml.safeLoad(fs.readFileSync(__dirname + '/jekyll/_data/dept/department.yaml', 'utf8'));

// var areaCodeOptions = require(__dirname+'/jekyll/_data/dept/areaCodeOptions.json');
// var planCodeOptions = require(__dirname+'/jekyll/_data/dept/planCodeOptions.json');
var areaCodeOptions= [];
if (typeof department.subdivisions != "undefined") {
     areaCodeOptions = department.subdivisions.map(function (s) { return { "value": s.abbr }; });
}
var planCodeOptions = department.plan_codes.map(
          function (p) { 
               return { "value": p }; 
          }
     );
var ethnicityCodeOptions = bc.readJSONfromAppDir(__dirname,'jekyll/_data/const/ethnicityCodeOptions.json');
var residencyCodeOptions = bc.readJSONfromAppDir(__dirname,'jekyll/_data/univ/residencyCodeOptions.json');
var degreeOutcomeOptions = bc.readJSONfromAppDir(__dirname,'jekyll/_data/dept/degreeOutcomeOptions.json');
var postgradPositionOptions = bc.readJSONfromAppDir(__dirname,'jekyll/_data/const/postgradPositionOptions.json');

// globals so we can call out student if a deep error
var current_fsuid;
var current_name;



var student_schema =  yaml.safeLoad(fs.readFileSync(__dirname + '/jekyll/_data/schemata/student_schema.yaml', 'utf8'));

var plan_schema =  yaml.safeLoad(fs.readFileSync(__dirname + '/jekyll/_data/dept/academic_plan_codes.yaml', 'utf8'));


var issues = new Array;

issues.push ("ErrorType,fsuid,last_name,first_name,schema,issue, key,value\n")


// console.log(student_schema);

var schema_keys = Object(student_schema).keys;
var dept_plan_codes = Object.getOwnPropertyNames(plan_schema);

/* ------------------------------------------------------------------------------- */

function errortype_issue_key(type,student,issue,schema,key,value) {
	return type + "," + student.fsuid + "," +student.last_name+ ","
	            + student.first_name + "," +  schema + "," + issue + "," + key + "," + value + "\n";
}

function critical_issue_key(student,issue,schema,key,value) {
		if (key == "outcome") {
		console.log("CRITICAL outcome: " + student.fsuid + "," +student.last_name+ ","
						+ student.first_name + value);

		}

		if (!suppress_critical) {
			issues.push( errortype_issue_key("Critical",student,issue,schema,key,value));
		}
};

function warning_issue_key(student,issue,schema,key,value) {
		if (!suppress_warnings) {
			issues.push( errortype_issue_key("Warning",student,issue,schema,key,value));
		}
};

function type_issue_key(student,issue,schema,key,value) {
	if (!suppress_types) {
		issues.push( errortype_issue_key("TypeFormat",student,issue,schema,key,value));
	}
};

/* ------------------------------------------------------------------------------- */

function term2months(year_term) {

    if (typeof year_term === 'undefined' || null == year_term) {
        console.log("UNDEFINED TERM " + current_name + " " + current_fsuid);
        return -1;
    }

    // terms are coded as "yyyy/t", where t = 1, 6, or 9
    // it's easiest to make comparisons by converting to months since 0 AD

	var year = parseInt(year_term.substring(0,4),10);
	var term = parseInt(year_term.substring(5),10);
    return year * 12 + term;

}

/* ------------------------------------------------------------------------------- */

function checkOption(options,value,value_type) {

	for (var i = 0 ; i < options.length; i++) {

		if (value == options[i].value) {
			return true;
		}

	}

	// console.log("Invalid " + value_type + ": " + value);
	return false;

}

/* ------------------------------------------------------------------------------- */

function validate_type(value_type,value) {


    // check to make the given value is in the correct format for the given value_type
    // value_type can be:
    // date: "mm/dd/yyyy"
    // term: "yyyy/t", where t  1, 6, or 9
    // text: a string
    // boolean
    // integer
	// timestamp: unix time stamp in seconds since 1970 (integer)
	// seconds: a duration in seconds (integer)
	// real: floating point number
	// years: duration in years (floating point number)
	// one of a number of Options (e.g. planCodeOptions, ethnicityCodeOptions)
	
	

	switch (value_type) {

		case 'date':
		// Validates that the input string is a valid date formatted as "mm/dd/yyyy"
		// http://stackoverflow.com/questions/6177975/how-to-validate-date-with-format-mm-dd-yyyy-in-javascript
			if (typeof value != 'string') {
				return false;
			}

			// First check for the pattern
			if (!/^\d{1,2}\/\d{1,2}\/\d{4}$/.test(value)) { return false; }

			// Parse the date parts to integers
			var parts = value.split("/");
			var day = parseInt(parts[1], 10);
			var month = parseInt(parts[0], 10);
			var year = parseInt(parts[2], 10);

			// Check the ranges of month and year
			if (year < min_valid_year || year > max_valid_year || month == 0 || month > 12) {
				return false;
			}

			var monthLength = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ];

			// Adjust for leap years
			if (year % 400 == 0 || (year % 100 != 0 && year % 4 == 0)) {
				monthLength[1] = 29;
			}

			// Check the range of the day
			return (day > 0 && day <= monthLength[month - 1]);
			break;

		case 'term':
			if (typeof value != 'string') { return false; }
			// First check for the pattern "yyyy/t"
			if (!/^\d{4}\/\d{1}$/.test(value)) { return false; }

			// Parse the date parts to integers
			var parts = value.split("/");
			var term = parseInt(parts[1], 10);
			var year = parseInt(parts[0], 10);

			// check for valid year and valid terms
			if (year < min_valid_year || year > max_valid_year ) { return false; }
			return (term == 1 || term == 6 || term == 9);

			break;

		case 'yyyy':
			// check for pattern yyyy
			if (!/^\d{4}$/.test(value)) { return false; }
			var year = parseInt(value, 10);
			if (year < min_valid_year || year > max_valid_year ) { return false; }
			break;
			
		case 'ECMA-262 date':
			// note:(,*) included to that toGMTString is recognized
			if (!/^^(Sun|Mon|Tue|Wed|Thu|Fri|Sat)(,*)\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s[0-9]{1,2}\s[0-9]{4}\s[0-9]{2}:[0-9]{2}:[0-9]{2}\sGMT(-|\+)[0-9]{4}(\s\([a-zA-Z\s]+\))*/.test(value)) { 
				console.log("Bad ECMA-262 Date: " + value);
				return false; }
			break;
			
		case 'text':
			return (typeof value == 'string' || value instanceof String);
			break;

		case 'boolean':
			return (typeof value == 'boolean');
			break;

		case 'integer':
		case 'timestamp':
		case 'seconds':
		case 'int_year':
			return (!Number.isNaN(value) && Number.isInteger(Number(value)));
			break;

		case 'real':
		case 'years':
		case 'fp_year':
			return (!Number.isNaN(value));
			break;

	case 'data.areaCodeOptions':
            return (checkOption(areaCodeOptions,value,value_type));
            break;
	case 'data.planCodeOptions':
            return (checkOption(planCodeOptions,value,value_type) );
            break;
    case 'const.ethnicityCodeOptions':
            return (checkOption(ethnicityCodeOptions,value,value_type));
            break;
    case 'univ.residencyCodeOptions':
            return (checkOption(residencyCodeOptions,value,value_type));
            break;
	case 'dept.degreeOutcomeOptions':
            return (checkOption(degreeOutcomeOptions,value,value_type) );
            break;
    case 'const.postgradPositionOptions':
            return (checkOption(postgradPositionOptions,value,value_type));
            break;


	case 'collection':
			return true;
			// NOTE: make this false so we remember to fix it!
			// TODO: fix it
			break;

	}
	return true;

};

/* ------------------------------------------------------------------------------- */

function validate_student_category(student, category) {

	// add critical/warning strings to the issues array for this student's info at this category
    // items in student_schema belong to different categories:
    // deprecated, admin, admissions, personal, academic, postgrad
    // required items which are missing are added to array of issues as critical  
    // non-required (optional) items which are missing are added to array of issues as warnings 
    // if an item is present, the type of the item value is checked 
 
	for ( var i = 0; i < student_schema.length; i++) {

		var item = student_schema[i];

		if (item.category == category) {


			// is item missing?
			if (typeof student[item.key] == 'undefined' || null == student[item.key] || (typeof student[item.key] == 'string' && 0 == student[item.key].length) ) {
				if (item.required == 'yes') {
					critical_issue_key(student,'missing','student',item.key,"????");
				}
				else {
					if (!suppress_warnings){
						warning_issue_key(student,'missing','student',item.key,"????");
					}
				}
			}
			else {
				// check format of item
				if (!validate_type(item.value_type,student[item.key])) {
					type_issue_key(student,'invalid ' + item.value_type,'student',item.key,student[item.key]);
				}

			} // has key-value pair
		} // in our category
	} // next key


};

/* ------------------------------------------------------------------------------- */

function check_committee_member(student,plan,key, value) {

	if (typeof value != 'undefined' && value != null ) {
			if (!Object(value).hasOwnProperty('fsuid') || value.fsuid == null || value.fsuid.length == 0) {
				critical_issue_key(student,'missing',plan.plan_code,key+".fsuid","????");
			}
			if (!Object(value).hasOwnProperty('name') || value.name == null || value.name.length == 0) {
				warning_issue_key(student,'missing',plan.plan_code,key+".name","????");
			}
			if (!Object(value).hasOwnProperty('email') || value.email == null || value.email.length == 0) {
				warning_issue_key(student,'missing',plan.plan_code,key+".email","????");
			}
	}

};

/* ------------------------------------------------------------------------------- */

function validate_plan_code(plan_code) {

	return (dept_plan_codes.indexOf(plan_code) != -1);

};

/* ------------------------------------------------------------------------------- */


function validate_student_plans(student) {

	// add critical/warning strings to the issues array for this student's academic plans
	// check to make sure the student has academic plans (needs at least one)
	// run through each plan, make sure it has a valid plan_code (eg BIOLOGYPD)
	// make sure each item in the plan matches the items in the plan_schema for its plan_code
	// we ignore "derived" items in the plans, because they are calculated at build time 
	// from values stored in the mongodb database
	// required items which are missing are added to array of issues as critical  
    // non-required (optional) items which are missing are added to array of issues as warnings 
    // if an item is present, the type of the item value is checked 
    // we also make some adhoc validations to make sure eg last_term falls after admit_term
    // then check on chairs and committee members 
    // (if cmte is present -- for older students we don't have cmte data yet)
    
  //  console.log("student: " + student.last_name + " " + student.fsuid);
    current_fsuid = student.fsuid;
    current_name = student.last_name;

	if (typeof student.academic_plans  === 'undefined' || student.academic_plans== null) {

		critical_issue_key(student,'missing','student',"academic_plans","????");

	}
	else {

			var plan_keys = Object.keys(student.academic_plans);

			for (var i = 0; i < plan_keys.length; i++ ) {

				var plan_code = plan_keys[i];

				if (!validate_plan_code(plan_code)) {

					critical_issue_key(student,'invalid plan code',plan_code,"plan_code",plan_code);

				}
				else {
					// SCHEMA VALIDATIONS

					var plan = student.academic_plans[plan_code];
					var plan_scheme = plan_schema[plan_code];
					


					var requirements = plan_scheme['requirements'];
					for ( var i = 0; i < requirements.length; i++) {

						var item = requirements[i];

							// is item missing?
							if (typeof plan[item.key] == 'undefined' || null == plan[item.key] || (typeof plan[item.key] == 'string' && 0 == plan[item.key].length) ) {
								
                          
								if (item.required == 'yes') {
									critical_issue_key(student,'missing',plan_code,item.key,"????");
								}
								else {
									if (!suppress_warnings){
										warning_issue_key(student,'missing',plan_code,item.key,"????");
									}
								}
							} // missing key-value pair
							else {
								// check format of item
								if (!validate_type(item.value_type,plan[item.key])) {
									type_issue_key(student,'invalid ' + item.value_type,plan_code,item.key,plan[item.key]);
								}

							} // has key-value pair

					} // next requirement key
					var info_items = plan_scheme['info'];
					for ( var i = 0; i < info_items.length; i++) {

						var item = info_items[i];

						if (item.category != 'derived') {

							// is item missing?
							if (typeof plan[item.key] == 'undefined' || null == plan[item.key] || (typeof plan[item.key] == 'string' && 0 == plan[item.key].length) )  {
								
								
								if (item.required == 'yes') {
									critical_issue_key(student,'missing',plan_code,item.key,"????");
								}
								else {
									if (!suppress_warnings){
										warning_issue_key(student,'missing',plan_code,item.key,"????");
									}
								}
							}
							else {
								// check format of item
								if (!validate_type(item.value_type,plan[item.key])) {
									type_issue_key(student,'invalid ' + item.value_type,plan_code,item.key,plan[item.key]);
								}

							} // has key-value pair

						} // not derived
					} // next key



					// ADHOC VALIDATIONS

					// TERMS
					// 1. compare last_term to admit_term, and both to student.admit_term and student.last_term

					// 2. make sure all objects of value_type date fall between admit_term and last_term

					// 3. if last_term, then check on outcome -- make sure it is one of a limited number of valid outcomes

					// 4. if last_term and plan_info has prelims, make sure there is a prelim date

					if (term2months(plan['admit_term']) < term2months(student['admit_term']) ) {
						critical_issue_key(student,'plan admit_term < student admit_term',plan_code,'last_term',plan['admit_term'] + " < " + student['admit_term']);
					}

					if (typeof student['last_term'] != 'undefined' && null != student['last_term'] ) {
						if (term2months(plan['admit_term']) > term2months(student['last_term']) ) {
							critical_issue_key(student,'plan admit_term > student last_term',plan_code,'last_term',plan['admit_term'] + " > " + student['last_term']);
						}
					}

					if (typeof plan['last_term'] != 'undefined' && null != plan['last_term'] ) {

						if (plan_scheme['has_prelim'] == true) {
							if (typeof plan['prelim_exam_passed_date'] == 'undefined' || null == plan['prelim_exam_passed_date'] ) {
								critical_issue_key(student,'missing',plan_code,'prelim_exam_passed_date',"????");
							}
						}

						if (term2months(plan['last_term'])+4 <= term2months(plan['admit_term']) ) {
							critical_issue_key(student,'last_term < plan admit_term',plan_code,'last_term',plan['last_term'] + " < " + plan['admit_term']);
						}

						if (term2months(plan['last_term'])+4 <= term2months(student['admit_term']) ) {
							critical_issue_key(student,'last_term < student admit_term',plan_code,'last_term',plan['last_term'] + " < " + plan['admit_term']);
						}
						if (typeof student['last_term'] != 'undefined' && null != student['last_term'] ) {
							if (term2months(plan['last_term']) > term2months(student['last_term']) ) {
								critical_issue_key(student,'plan last_term > student last_term',plan_code,'last_term',plan['last_term'] + " > " + student['last_term']);
							}
						}

						if (typeof plan['outcome'] == 'undefined' || null == plan['outcome'] ) {
						

							critical_issue_key(student,'missing',plan_code,'outcome',"????");
						}


					}

					// if they exist, check chairs and members and univ_rep for name, fsuid, and emails

					check_committee_member(student,plan,'chair',plan.chair);
					check_committee_member(student,plan,'co_chair1',plan.co_chair1);
					check_committee_member(student,plan,'co_chair2',plan.co_chair2);
					check_committee_member(student,plan,'univ_rep', plan.univ_rep);
					if (typeof plan.members != 'undefined') {
						check_committee_member(student,plan,'member[0]',plan.members[0]);
						check_committee_member(student,plan,'member[1]',plan.members[1]);
						check_committee_member(student,plan,'member[2]',plan.members[2]);
						check_committee_member(student,plan,'member[3]',plan.members[3]);
					}
				} // valid plan_code

		} // next plan_code
	} // has academic_plans


}; // validate_student_plans

/* ------------------------------------------------------------------------------- */

function validate_student_last_term_consistency(student) {

	// check that student.last_term is equal to last_term of at least one of their plans

	var has_last_term = (typeof student.last_term   != 'undefined' &&  student.last_term != null && validate_type('term',student.last_term));


	var last_terms = new Array;

	if (!(typeof student.academic_plans  === 'undefined' || student.academic_plans== null)) {

			var plan_keys = Object.keys(student.academic_plans);

			for (var i = 0; i < plan_keys.length; i++ ) {

				var plan_code = plan_keys[i];
				var plan = student.academic_plans[plan_code];

				if (typeof plan['last_term']  != 'undefined' && plan['last_term'] != null) {

					var last_term = {};
					last_term.term = plan['last_term'];
					last_term.months = term2months(plan['last_term']);

					if (typeof plan['outcome']  != 'undefined' && plan['outcome'] != null) {
						last_term.outcome = plan['outcome'];
					}
					else { 	
					     last_term.outcome = null; 
					}
					last_terms.push(last_term);
				}

			} // next plan


	} // has plans

	if (!has_last_term) {

		if (last_terms.length == 0) { return true; }

		//console.log(student.last_name + " last_terms length: "+ last_terms.length + " academic_plans: " + Object.keys(student.academic_plans).length);

		if (last_terms.length == Object.keys(student.academic_plans).length) {
			critical_issue_key(student,'missing','student','last_term',"last term missing but has at least one plan[last_term]");
			return false;
		}
		

	}

	else {
	
	     if (last_terms.length != Object.keys(student.academic_plans).length) {
	          critical_issue_key(student,'missing','student','last_term',"student has last term, but at least one plan is missing its last_term");
	     }

		student_last_months = term2months(student.last_term);
		var max_months = 0;
		var matches_plan = -1;

		for (var i=0; i< last_terms.length; i++) {

			if (student_last_months == last_terms[i].months) {
				matches_plan = i;
			}
			if (last_terms[i].months > max_months) {
				max_months = last_terms[i].months;
			}

		}
		if (matches_plan < 0) {
			critical_issue_key(student,'mismatch ','student','last_term',"last term doesn't match any one plan last_terms");
			return false;
		}
		else {

			if ( student_last_months < max_months) {
				critical_issue_key(student,'mismatch ','student','last_term',"last term earlier than at least one plan last_terms");
				return false;

			}

			if (typeof student.outcome   != 'undefined' && student.outcome  != null && last_terms[matches_plan].outcome != null) {
				if (student.outcome != last_terms[matches_plan].outcome) {
					critical_issue_key(student,'mismatch ','student','outcome',"student outcome doesn't match plan outcome");
					return false;
				}
			}

		}

	}

	return true;

}; // validate_student_last_term_consistency

/* ------------------------------------------------------------------------------- */

function validate_bowers_fields(student) {

    // admit terms and last terms were imported from Judy Bowers old files
    // we check to see if out values match those of Bower's, and flag if a mismatch
    // (we should at least go back and verify to see which terms are correct)

	if (!(typeof student.academic_plans  === 'undefined' || student.academic_plans== null)) {

			var plan_keys = Object.keys(student.academic_plans);

			for (var i = 0; i < plan_keys.length; i++ ) {

				var plan_code = plan_keys[i];
				var plan = student.academic_plans[plan_code];

				if (typeof plan['admit_bowers']  != 'undefined' && plan['admit_bowers'] != null) {
					if ( plan['admit_bowers'] != plan['admit_term']) {
						critical_issue_key(student,'mismatch',plan_code,'admit_bowers',"admit_bowers  doesn't match admit_term");
					}

				}

				if (typeof plan['last_term']  != 'undefined' && plan['last_term'] != null) {
					if (typeof plan['complete_bowers']  != 'undefined' && plan['complete_bowers'] != null) {
						if ( plan['complete_bowers'] != plan['last_term']) {
							critical_issue_key(student,'mismatch',plan_code,'complete_bowers',"complete_bowers  doesn't match last_term");
						}
					}
				}

			}

		}



}; // validate_bowers_fields


/* ------------------------------------------------------------------------------- */

for (var i=0; i< allstudents.length; i++) {


	var current_student = (typeof allstudents[i].last_term   == 'undefined' || allstudents[i].last_term == null);

	var check_student = true;

	if (!current_student && limit_to_current_students) { check_student = false; }
	if (current_student && limit_to_alums)  { check_student = false; }

	if (check_student) {
		validate_student_category(allstudents[i],'admin');
		validate_student_category(allstudents[i],'personal');
		validate_student_category(allstudents[i],'academic');

		validate_student_plans(allstudents[i]);

		validate_student_last_term_consistency(allstudents[i]);

		validate_bowers_fields(allstudents[i]);
	}

} // next student


console.log("Found " + issues.length + " issues");

var file = fs.createWriteStream(__dirname + '/jekyll/_data/issues_allstudents.csv');
file.on('error', function(err) { /* error handling */ });
issues.forEach(function(i) { file.write(i); });
file.end();