var retext = require('retext');
var keywords = require('retext-keywords');
var fs = require('fs');
var nlcstToString = require('nlcst-to-string');
/**
* Takes a printedBill (from printBill.js) and "tags" it: Returns an array of autogenerated tags and subjects.
* @param {object} bill - JSON bill (result from printBill.js)
* @param {function} callback - Callback
* @param {object} options - Options (Optional)
* @param {number} options.maxTags - Maximum number of tags to be returned (default: 10)
* @return {object} - An array of generated tags for the bill
* @example
* getBill("BILLSTATUS-115hr99", function(res){
* printBill(res, function(parsedResult){
* tagBill(parsedResult, function(tags){
* //tags is an array of tags
* });
* });
* });
*/
var tagBill = function(bill, callback, options){
options = options || {};
var maxTags = ("maxTags" in options) ? options.maxTags : 10;
if(typeof maxTags != "number") throw new TypeError("'options.maxTags' must be a number");
if(typeof bill != "object") throw new TypeError("'bill' needs to be an object");
if(callback == null) throw new TypeError("A callback needs to be included");
if(!("head" in bill) || !("body" in bill)) throw new TypeError("The bill supplied needs to be the output of printBill.js");
try{
var corpus = bill.body
.replace(/\(\w+\)/gi, "")
.replace(/[^a-z\n ]/gi, "")
.replace(/ {2,}/g, " "); //Take the body and strip away all non letters
var wordsToExcludeFromTags = ["paragraph", "none", "america", "congress", "united", "states", "bill", "act", "senate", "representatives", "house", "quarum", "session", "committee", "report", "legislation"];
wordsToExcludeFromTags.forEach(function(word){
corpus = corpus.replace(new RegExp(word, "gi"), "");
});
var tags = [];
retext().use(keywords, {maximum: maxTags}).process(corpus, function (err, file) {
if (err) throw err;
file.data.keywords.forEach(function (keyword) {
tags.push(nlcstToString(keyword.matches[0].node).toLowerCase());
});
});
if(tags.length > maxTags) {
tags = tags.slice(0, maxTags);
}
callback(tags);
}
catch(e){
throw new Error("JSON supplied must be a valid bill");
}
};
module.exports = tagBill;