Source: parser/tagBill.js

var retext = require('retext');
var keywords = require('retext-keywords');
var fs = require('fs');
var nlcstToString = require('nlcst-to-string');

/**
 * Takes a printedBill (from printBill.js) and "tags" it: Returns an array of autogenerated tags and subjects.
 * @param {object} bill - JSON bill (result from printBill.js)
 * @param {function} callback - Callback
 * @param {object} options - Options (Optional)
 * @param {number} options.maxTags - Maximum number of tags to be returned (default: 10)
 * @return {object} - An array of generated tags for the bill
 * @example
 * getBill("BILLSTATUS-115hr99", function(res){
 *   printBill(res, function(parsedResult){
 *     tagBill(parsedResult, function(tags){
 *        //tags is an array of tags
 *     });
 *   });
 * });
 */

var tagBill = function(bill, callback, options){
  options = options || {};
  var maxTags = ("maxTags" in options) ? options.maxTags : 10;

  if(typeof maxTags != "number") throw new TypeError("'options.maxTags' must be a number");
  if(typeof bill != "object") throw new TypeError("'bill' needs to be an object");
  if(callback == null) throw new TypeError("A callback needs to be included");
  if(!("head" in bill) || !("body" in bill)) throw new TypeError("The bill supplied needs to be the output of printBill.js");

  try{
    var corpus = bill.body
    .replace(/\(\w+\)/gi, "")
    .replace(/[^a-z\n ]/gi, "")
    .replace(/ {2,}/g, " "); //Take the body and strip away all non letters

    var wordsToExcludeFromTags = ["paragraph", "none", "america", "congress", "united", "states", "bill", "act", "senate", "representatives", "house", "quarum", "session", "committee", "report", "legislation"];

    wordsToExcludeFromTags.forEach(function(word){
      corpus = corpus.replace(new RegExp(word, "gi"), "");
    });

    var tags = [];

    retext().use(keywords, {maximum: maxTags}).process(corpus, function (err, file) {
      if (err) throw err;

      file.data.keywords.forEach(function (keyword) {
        tags.push(nlcstToString(keyword.matches[0].node).toLowerCase());
      });
    });

    if(tags.length > maxTags) {
      tags = tags.slice(0, maxTags);
    }

    callback(tags);
  }
  catch(e){
    throw new Error("JSON supplied must be a valid bill");
  }
};

module.exports = tagBill;