User:Closeapple/RMFset.js

Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump.
This code will be executed when previewing this page.
Documentation for this user script can be added at User:Closeapple/RMFset.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// Closeapple RMF set.js
// Functions used by Closeapple for
// http://meta.wikimedia.org/wiki/TemplateScript
//
// Copyright (C) 2009-2010 Closeapple
// You may copy and modify this file under your choice of these licenses:
// * GDFL: GNU Free Documentation License, version of your choice
// * CC-BY-SA: Creative Commons Attribution+ShareAlike, version 3.0
//   or later, U.S. or Unported
// * CC-BY-NC: Creative Commons Attribution+NonCommercialOnly, version 3.0
//   or later, U.S. or Unported
// * LGPL: Lesser GNU Public License, version of your choice
//
// This is a set of regex scripts to use on MediaWiki using TemplateScript
// described at [[Meta:TemplateScript]]. (You don't need to have it loaded
// separately, but you can enable it by going to [[Special:Preferences]] and
// checking in the Gadgets section.)
//
// Coding notes
// ------------
// This JavaScript uses UTF-8 characters like arrows and lines, and even in a
// few regexes for dashes and things.
// Shame on you if you load it in a non-Unicode editor!
// Maybe JS supports some kind of \u syntax in regexes, but I don't know yet,
// so I haven't done it. 
//
// Since this is written for JavaScript, it uses the Perl extensions that
// JS supports, like (?:) for non-storing (), +? and *? for shortest
// matches (which are vital in some places!), and, in the future, (?!)
// for negative look-ahead.
// Also, / in a pattern is always written as \/ here.
//
// regex() is used when affecting the edit summary is undesirable:
// for example, when a regex is likely to match even when when not fixed

//  TODO: Rewrite most ([^whatever]|$) junk to use (?!whatever) instead
//  Wishlist:
//  * [[mnopqr|mnopqrs]] -> [[mnopqr]]s
//  * [[mnopqr]]'s -> [[mnopqr|mnopqr's]] (with weird apostrophes too)
//  * [[link_with_spaces]] -> [[link with spaces]] (but maybe that's bad)
//  Far-off wishlist:
//  * <ref name="whatever"></ref> -> <ref name="whatever"/> (maybe)
//  * [[Abcdef, IL|Abcdef]] -> [[Abcdef, Illinois|Abcdef]]
//  * [[Abcdef, IL|Abcdef, Illinois]] -> [[Abcdef, Illinois]]
/* global $, pathoschild */

//  true to debug; false to not debug
if (!rmfCa_debug) { var rmfCa_debug=
//  true;
  false;
}

/**
 * TemplateScript adds configurable templates and scripts to the sidebar, and adds an example regex editor.
 * @see https://meta.wikimedia.org/wiki/TemplateScript
 * @update-token [[File:pathoschild/templatescript.js]]
 */
// <pre>
$.ajax('//tools-static.wmflabs.org/meta/scripts/pathoschild.templatescript.js', { dataType:'script', cache:true }).then(function() {
	pathoschild.TemplateScript.add([
		/*TODO: It's not working!
		{
			name: 'Standardize',
			scriptUrl: 'meta:User:Pathoschild/standardise.js',
			script: function() { standardize(); }
		},
		*/
		{
			name: 'Safe cleanups',
			script: function(editor) {
				// do all the unquestionably appropriate cleanups here
				rmfCa_removetrailingspaces(editor);
				rmfCa_brcleaning(editor);
				rmfCa_refspacing(editor);
				rmfCa_moverefpunct(editor);
				rmfCa_wikilinkunderlines(editor);
				rmfCa_wikilinkspacing(editor);
				rmfCa_obviousdashdash(editor);
				rmfCa_htmltypos(editor);
				rmfCa_obviousreferences(editor);
				rmfCa_washington_obvious(editor);
			}
		},
		{
			name: '├ trailing spaces',
			script: rmfCa_removetrailingspaces
		},
		{
			name: '├ <br/> cleaning',
			script: rmfCa_brcleaning
		},
		{
			name: '├ [[_|x]]→[[ |x]]',
			script: rmfCa_wikilinkunderlines
		},
		{
			name: '├ [[ ]] spacing',
			script: 'rmfCa_wikilinkspacing'
		},
		{
			name: '├ safe -- fixes',
			script: rmfCa_obviousdashdash
		},
		{
			name: '├ <ref>. → .<ref>',
			script: rmfCa_moverefpunct
		},
		{
			name: '├ <ref> spacing',
			script: rmfCa_refspacing
		},
		{
			name: '├ =Ref= section',
			script: rmfCa_obviousreferences
		},
		{
			name: '├ HTML typos',
			script: rmfCa_htmltypos
		},
		{
			name: '└ safe Washington',
			script: rmfCa_washington_obvious
		},
		{
			name: 'Looser cleanups',
			script: function(editor) {
				rmfCa_prosedashdash(editor);
				rmfCa_unlinkfulldates(editor);
				rmfCa_unsubstreflist(editor);
				rmfCa_washington_loose(editor);
			}
		},
		{
			name: '├ Prose -- fixes',
			script: rmfCa_prosedashdash
		},
		{
			name: '├ [[]] → {{date}}',
			script: rmfCa_unlinkfulldates
		},
		{
			name: '├ unsubst {{reflist}}',
			script: rmfCa_unsubstreflist
		},
		{
			name: '└ looser Washington',
			script: rmfCa_washington_loose
		}
	]);
});

//  rmfCa_regex_reason: replaces regex with replacement, then adds
//  summary or detail to edit box if there was a match and unsets
//  "minor" flag if edit is major.
//  return values (not used by other functions, so not necessary):
//  * 0 if no match
//  * 1 if match but no changes (from regexes that broadly match things they
//    don't necessarily need to fix)
//  * 2 if match caused some kind of change
function rmfCa_regex_reason(editor, pattern, replacement, detail, summary, major) {
	if (!editor.get().match(pattern)) {
		if(rmfCa_debug)
			alert('0 = no match for '+detail);
		return 0;
	}
	var beforetext = editor.get();
	editor.replace(pattern, replacement);
	if (beforetext == editor.get()) {
		if(rmfCa_debug)
			alert('1 = matches but doesn\'t change: '+detail);
		return 1;
	}
	rmfCa_setreason(editor, detail, summary, major);
	if (!detail)
		detail = summary;
	if(rmfCa_debug)
		alert('2 = fixes: '+detail);
	return 2;
}
//  rmfCa_setreason: one-stop location to add edit summary
//  detail: detailed description of change; not in edit summary unless
//    change is major AND there is no summary
//  summary: edit summary to append if doesn't already exist
//  major = if string "true" or "major" then unset "minor edit" checkbog
function rmfCa_setreason(editor, detail, summary, major) {
	if ( major == 'true' || major == 'major' ) {
		editor.options({ minor: false });

		if (detail && !summary)
			summary = detail;
		
		editor.appendEditSummary(summary);
	}
	else {
		if (!summary)
			summary = 'minor wikification fixups';
		editor.appendEditSummary(summary);
	}
}

//
//==========
//  Actual regexes start here!
//==========
//

//  Aggressiveness: safe
function rmfCa_removetrailingspaces(editor) {
	rmfCa_regex_reason(editor, /[ \t]+$/mg, '', 'removed trailing spaces');
}

//  Aggressiveness: safe
function rmfCa_brcleaning(editor) {
	editor
		.replace(/<\s*BR\s*(?:\/s*)>/g, '<BR/>')  // if all caps, OK
		.replace(/<\s*(?:bR|[Bb]r)\s*(?:\/\s*)>/g, '<br/>'); // otherwise, lower case!
	// completely surrounded -> no spaces
	rmfCa_regex_reason(editor, /\s+(<br\/>)\s+/ig, '$1', 'spacing on both sides of <br/>');
	editor
		.replace(/[ \t]+[ \t](<br\/>)/ig, ' $1') // allow only one leading space
		.replace(/(<br\/>)[ \t]+[ \t]/ig, '$1 '); // allow only one trailing space
}

// moverefpunct: move punctuation to before (a sequence of) references
// Aggressiveness: safe
// Example: <ref>abc</ref>. -> .<ref>abc</ref>
// Scope: always
// Type: Perl/JavaScript (needs (?:) to do non-storing match and *? to select shortest)
// Works even across lines if [^<] matches linefeeds in regex implementation.
// Spacing within ref tags and reference content is passed and not modified.
// /m parameter may or may not be useful - works both ways.
// Does not deal with 
// "!" is not matched because refs show up in table headers and we don't
// want the ref to swap with the header separators.  Examples:
// ! Header A<ref>12</ref>
// ! Header B
// or ! Header A<ref>12</ref> !! Header B
// Note: will not see any <ref> sections with < inside the reference tags or
// with < or > inside the ref content itself.  For example:
// Will be skipped: <ref>12<34>56</ref>
// Will be skipped: <ref<!-- test -->>123456</ref>
// TODO: Possibly match {{dated info}}, {{fact}}, etc. the same way.
function rmfCa_moverefpunct(editor) {
	// TESTING next line
	rmfCa_regex_reason(editor, /\s*((?:[.?,:;]|°|&deg;)+)\s*((?:<ref(?:\s[^/<>]*\/|(?:\s[^<>]*?)?\s*>[^<]*<\s*\/\s*ref)\s*>\s*?)+)[ \t]*\1+/ig, '$1$2', 'limit duplicate punctuation to before <ref>', 'punctuation to before <ref>s');
	// already tested
	// old version:
	// rmfCa_regex_reason(editor, /\s*((?:<ref(?:\s[^/<>]*\/|(?:\s[^<>]*?)?\s*>[^<]*<\s*\/\s*ref)\s*>\s*?)+)\s*((?:[.?!,:;]|°|&deg;)+)/ig, '$2$1', 'move punctuation to before <ref>', 'punctuation to before <ref>s');
	// newer version that isn't supposed to catch \n[:;] but still does
	// TODO: Figure out why \n[:;] still matches!
	rmfCa_regex_reason(editor, /\s*((?:<ref(?:\s[^/<>]*\/|(?:\s[^<>]*?)?\s*>[^<]*<\s*\/\s*ref)\s*>\s*?)+)(?:\s*((?:[.?!,]|°|&deg;)+)|[ \t]*([;:]+))/ig, '$2$3$1', 'move punctuation to before <ref>', 'punctuation to before <ref>s');
}

//  refspacing: clean spaces in and around ref tags
//  Aggressiveness: safe
//  Note: Will not match any <ref> tags with < or > inside the ref tag
//  itself.  For example, will skip <ref<!-- test -->>
function rmfCa_refspacing(editor) {
	//  clean spaces in opening <ref> or standalone <ref/>
	//  Allows (optional) one space between last parameter and /> in case
	//  some people like that form.
	//  Does not yet clean spaces BETWEEN paramaters if that were to happen.
	//  Note: matches tags whether already cleaned or not
	rmfCa_regex_reason(editor, /<ref\s*(\s[^<>]*[^/<>\s]|)\s*?((?:\s?\/)?)\s*>/ig, '<ref$1$2>', '< ref > → <ref>');
	//  Remove spaces between a non-wiki character and <ref
	//  except after ! !- | |- because some people might like table alignment
	//  Only matches if there's a space to clean.
	rmfCa_regex_reason(editor, /(^-?|[^!|-]|[^!|]-)\s+<ref([^<>\w]*|\W[^<>]*)>/ig, '$1<ref$2>', 'rm spaces before <ref>s');
	//  clean spaces in and before closing </ref>
	//  Does not remove newline before </ref> - some people might like that.
	//  Note: matches tags whether already cleaned or not
	rmfCa_regex_reason(editor, /[ \t]<\s*\/\s*ref\s*>/ig, '</ref>', '< / ref > → </ref>');
}

//  wikilinkspacing: removes extraneous spaces in [[ ]] tags
//  Aggressiveness: safe
function rmfCa_wikilinkspacing(editor) {
	//  Category: eliminate spaces between [[ and any | instead of moving outside
	//  Also has side effect of capitalizing [[Category: correctly.
	//  Note: matches whether already cleaned or not
	rmfCa_regex_reason(editor, /\[\[[ \t]*Category[ \t]*:[ \t]*([^\]|]*[^\]|\s][ \t]*)/ig, '[[Category:$1', 'category spacing 1');
	//  Interlanguage: two-letter codes other than WP
	//  eliminate spaces instead of moving outside
	//  TODO: add more three-letter language codes
	rmfCa_regex_reason(editor, /\[\[[ \t]*([a-vx-z][a-z]|[a-z][a-oq-z]|simple|ang)[ \t]*:[ \t]/ig, '[[$1:', 'interwiki spacing 1');
	//  Protect special case of "[[Category:Something| ".
	//  Note: Matches based on assumption that "[[Category:Something|" has
	//  already had its spaces cleaned out above.
	//  Note: REQUIRES conversion back later in this fuction.
	//  Note: Turns multiple prefix spaces into a single one.
	//  Note: JavaScript in Firefox 3.5.5 parses this regex to mean something
	//  different if we use the [^]|] as part of this pattern, so we use
	//  [^\]|] instead to mean "anything but ] or |".
	editor.replace(/(\[\[Category:[^\]|]+\|)[ \t]+/g, '$1%%rmfCaSpace%%');
	//  Category/Interlanguage: eliminate spaces just before ]] instead of
	//  moving outside
	//  only matches when there's an actual space before ]]
	//  Note: Depends on initial spacing being eliminated above already.
	//  TODO: add more three-letter language codes
	rmfCa_regex_reason(editor, /\[\[[ \t]*(Category|[a-vx-z][a-z]|[a-z][a-oq-z]|simple|ang):([^]]+[^]\s]*)[ \t]+\]\]/ig, '[[$1:$2]]', 'category spacing 2');
	// For other [[ ]], move spaces to outside of brackets
	// only matches when there's an actual space just inside [[ or [[: or ]]
	// Depends on exceptions already being eliminated above.
	// Skips [[+space+: because that acts strangely on Wikipedia.
	rmfCa_regex_reason(editor, /[ \t]*\[\[(:?)[ \t]+([[^ \t:]])/g, ' [[$1$2', 'wikilinks starting with space');
	rmfCa_regex_reason(editor, /[ \t]+\]\][ \t]*/g, ']] ', 'wikilinks ending with space');
	//  Turn protected spaces back into normal spaces.
	editor.replace(/%%rmfCaSpace%%/g, ' ');
}

//  wikilinkunderlines: remove _ from targets in wikilinks
//  Aggressiveness: intended to be safe
//  IN TESTING - need to determine if it can remove more than one _
//  Removes _ other than at the beginning, end, or next to another _.
//  Designed to work ONLY if visible part of link does not contain _ also.
function rmfCa_wikilinkunderlines(editor) {
	//  Dang - doesn't match more than one _.
	//  rmfCa_regex_reason(editor, /\[\[\s*([^\]|]*[^\]|_])_([^\]|_][^\]|]*)\s*\|\s*([^\]_]+)\s*\]\]/g, '[[$1 $2|$3]]', 'rm _ from wikilink target');
	//  Dang - the next one doesn't match more than one _ either!
	rmfCa_regex_reason(editor, /\[\[\s*([^\]|]*[^\]|_])_(?=[^\]|_][^\]|]*\|[^\]_]+\]\])/g, '[[$1 ', 'rm _ from wikilink target');
}

//  obviousdashdash: multiple hypens = &mdash; ("safe" matches)
//  Aggressiveness: safe
//  We're actually very limited here because we have to avoid these:
//  Linux command line parameters:
//    space + -- + alphanum
//  C language arithmetic:
//    alphanum/(/)/[/]/*/& + --
//    -- + alphanum/(/)/[/]/*/&
//  Note: Also, some URL might be stupid enough to have two hyphens, so we
//  should exclude any non-space strings with / before the double hypens.
//  It calls commondashdash at the end for completeness.
//  TODO: Could have lots more combinations
function rmfCa_obviousdashdash(editor) {
	// linestart/space + string of non-slash + digit + DASHES + digit
	// avoiding slash to avoid possible idiocy like 1--2 in URLs
	rmfCa_regex_reason(editor, /(^|\s[^\/\s]+)(\d\s?)\s*?--+(\s?)\s*?(\d)/g, '$1$2&mdash;$3$4', 'digit--digit→&mdash;' );
	// pretty safe: stuff in QUOTE or BLOCKQUOTE with no other <>
	rmfCa_regex_reason(editor, /<\s*((?:BLOCK)?QUOTE)\s*>([^<>]*)--([^<>]*)<\s*\/\s*((?:BLOCK)?QUOTE)\s*>/ig, '<$1>$2 &mdash; $3</$4>', '-- → &mdash; in quote');
	rmfCa_commondashdash(editor);
}

//  prosedashdash: multiple hypens = &mdash; (when all prose)
//  Aggressiveness: prose; assumes no command lines or programming code
//  This one assumes that there are no command lines or C programming
//  code embedded in the text, so it can be a little more aggressively
//  general.
//  It calls commondashdash at the end for completeness.
//  TODO: Find a more clever way than [^>][^>] to make sure that
//  <!----- something -----> doesn't turn into
//  <!----- something ---&mdash;->
function rmfCa_prosedashdash(editor) {
	// space on the front is spaced dash for sure unless <!-- -->
	rmfCa_regex_reason(editor, /\s+--+\s*([^>][^>]|$)/g, ' &mdash; $1');
	// linestart/space + string of non-/ non-! + hyphens is dash
	// Avoiding ! because otherwise <!-- comments --> get mangled!
	// Avoiding > on the end for the same reason.
	// Avoiding slash to avoid possible idiocy like a--b in URLs.
	// Allows one space to survive after the dash.
	rmfCa_regex_reason(editor, /(^|\s[^\/!\s]+)--+(\s?)\s*([^>][^>]|$)/g, '$1&mdash;$2$3', '-- → &mdash;', '-- → &mdash; in prose');
	rmfCa_commondashdash(editor);
}

//  commondashdash: multiple hypens = &mdash; (whether safe or prose)
//  Aggressiveness: safe
//  This has the regexes that bother obviousdashdash and prosedashdash
//  (safe and less safe) call, so that we don't have to double-process
//  "safe" things in obviousdashdash that have already been handled by
//  more generalized matches in other dashdash handlers.
function rmfCa_commondashdash(editor) {
	// NOT pretty safe: at least one mdash/ndash in a row of mdash/ndash/hyphens
	// with no inner spaces
	// (We'll be brave and assume anyone who uses the table construct
	// |- + mdash/ndash with no space between deserves what they get.)
	// TODO: Find a way for this to NOT match <!--(dashes)-->
	// Commented out until then.
	//  rmfCa_regex_reason(editor, /\s*(?:(?:&[mn]dash;|-|–|—)+(?:&[mn]dash;|–|—)+|(?:&[mn]dash;|–|—)+(?:&[mn]dash;|-|–|—)+)\s*/g, ' &mdash;', 'string of dashes → single &mdash;', 'string of dashes → single &mdash;');
}

//  htmltypos: fix common HTML typos and screwups
//  Aggressiveness: safe
function rmfCa_htmltypos(editor) {
	// forgotten semicolon on HTML entities
	rmfCa_regex_reason(editor, /&([gl]t|[mn]dash|nbsp)(?:[^;];?)([^;]|$)/ig, '&$1;$2', 'missing ; on HTML entity');
	// wrong capitalization on HTML entities
	rmfCa_regex_reason(editor, /&M(?:dash|DASH);/, '&mdash;', '&MDASH→&mdash;' );
	rmfCa_regex_reason(editor, /&N(?:dash|DASH);/, '&ndash;', '&NDASH→&ndash;' );
	rmfCa_regex_reason(editor, /&N(?:bsp|BSP);/, '&nbsp;', '&NBSP→&nbsp;' );
}

//  unlinkfulldates: remove [[ ]] from full dates
//  UNTESTED but complete
//  Aggressiveness: mostly OK if it's this MediaWiki's house policy
//  Does not recognize any way of marking dates as "supposed to be linked".
//  However, it only unlinks FULL dates, not isolated Year or Month-Day.
//  Note: Only converts month names if English
//  TODO: Make it skip prefix/suffix for when on a date= parameter.
function rmfCa_unlinkfulldates(editor) {
	rmfCa_unlinkfulldates_template(editor, '{{date|', '}}');
}
function rmfCa_unlinkfulldates_template(editor, prefix, suffix) {
	//  U.S. style: [[January 1]](,)(of) [[2345]] or 2345
	rmfCa_regex_reason(editor, /\[\[[ \t]*(J(?:an(?:uary)?|u(?:ne?|ly?))|Feb(?:ruary)?|Ma(?:r(?:ch)?|y)|A(?:pr(?:il)?|ug(?:ust)?)|Sep(?:t(?:ember)?)?|Oct(?:ober)?|(?:Nov|Dec)(?:ember)?)[ \t]+([0-2]?\d|3[01])(?:st|[nr]?d|th)?(?:[ \t]*\]\])?[,\s]+(?:(?:in|of)\s+)?(?:\[\[[ \t]*)?(\d{3,4})[ \t]*\]\]/mig, prefix+'$1 $2, $3'+suffix, 'unlink U.S. dates', '[[MOS:UNLINKDATES]]');
	//  European style: [[1 January]](,)(of) [[2345]] or 2345
	rmfCa_regex_reason(editor, /\[\[[ \t]*([0-2]?\d|3[01])(?:st|[nr]?d|th)?[ \t]+(J(?:an(?:uary)?|u(?:ne?|ly?))|Feb(?:ruary)?|Ma(?:r(?:ch)?|y)|A(?:pr(?:il)?|ug(?:ust)?)|Sep(?:t(?:ember)?)?|Oct(?:ober)?|(?:Nov|Dec)(?:ember)?)(?:[ \t]*\]\])?[,\s]+(?:(?:in|of)\s+)?(?:\[\[[ \t]*)?(\d{3,4})[ \t]*\]\]/mig, prefix+'$1 $2 $3'+suffix, 'unlink European dates', '[[MOS:UNLINKDATES]]');
	//  ISO 8601: [[2345-06-07]] or [[2345]]-[[06-07]]
	//  rmfCa_regex_reason(editor, /\[\[\s*(\d\d\d\d)(?:\s*\]\])?-(?:\[\[\s*)(\d\d?)(?:\s*\]\])?-(?:\[\[\s*)(\d\d?)\s*\]\]/g, prefix+'$1-$2-$3'+suffix, 'unlink ISO dates', '[[MOS:UNLINKDATES]]');
	rmfCa_regex_reason(editor, /\[\[[ \t]*(\d{3,4})(?:[ \t]*\]\])?\s*-\s*(?:\[\[[ \t]*)?(0?[2-9]|1[012]?)[ \t]*-[ \t]*([0-2]?\d|3[01])[ \t]*\]\]/mg, prefix+'$1-$2-$3'+suffix, 'unlink ISO-8601 dates', '[[MOS:UNLINKDATES]]');
}

//  obviousreflist: references section changes when obviously correctable
//  Scope: Wikipedia - requires {{reflist}} template to exist on wiki
//  Aggressiveness: safe
//  UNTESTED
function rmfCa_obviousreferences(editor) {
	//  "References" or "Sources" section header, followed immediately by
	//  either <references> with parameters or any {{reflist}},
	//  gets the following cleanups:
	//  * "Sources" section name changes to "References"
	//  * equals signs and spaces get balanced on that section header
	//  Parameters to <references> or {{reflist}} are passed through unchanged.
	//  May match even when not making changes.
	editor.replace(/^(=+)([ \t]?)[ \t]*(?:Refe?ren|Sour)ces?[ \t]*=+[ \t]*$\s*(\{\{reflist[^<>}]*\}\}|<\s*references(?:\s+[^\/<>\s][^>]+>))/gim, '$1$2References$2$1\n$3');
	//  "References" or "Sources" section header, followed immediately by
	//  <references> with no parameters, gets the same as above,
	//  plus <references> is turned into {{reflist}}.
	rmfCa_regex_reason(editor, /^(=+)([ \t]?)[ \t]*(?:Refe?ren|Sour)ces?[ \t]*=+[ \t]*$\s*<\s*references[\/\s]*>/gim, '$1$2References$2$1\n{{reflist}}', 'References section fixup');
}

//  unsubstreflist: turn things like <small><div><references></div></small>
//  and <small><div>{{reflist}}</div></small> into just {{reflist}}
//  Scope: Wikipedia - requires {{reflist}} template to exist on wiki
//  Aggressiveness: slightly; see warning below
//  UNTESTED
//
//  Warning: This pattern doesn't know how to BALANCE start and ending
//  <div> and <small> tags - it just removes matching ones contiguous
//  before and after references.  So if someone is silly enough to put
//  those tags contiguous to the references on one side but not the other,
//  this pattern will blast the ones contiguous to the references anyway,
//  causing the tags to become unbalanced!
//  Note: If <references> contains parameters, no conversion is done,
//  because the pattern doesn't know how to convert those parameters into
//  {{reflist}} parameters.  An existing {{reflist}} with parameters is
//  still matched, since the parameters don't have to be converted then.
function rmfCa_unsubstreflist(editor) {
	rmfCa_regex_reason(editor, /(?:<(?:div(?:\s+(?:class="(?:\s*references-[-\w])+"|style="(?:[-\w]*column-[-\w]*:[\w\s]*;)\s*"))*|small)>\s*)+(<\s*references[^\/<>]*\/[\s*]>|\{\{reflist[^\}<>]*\}\})(?:\s*<\s*\/\s*(div|small)\s*>)+/ig, '$1', 'unsubst/cleanup {{reflist}}');
	//  optional: run rmfCa_obviousreferences() now that References section
	//  might have gotten cleaned up.
	rmfCa_obviousreferences(editor);
}

//  washington_obvious: disambig obvious meanings of [[Washington]]
//  IN TESTING
function rmfCa_washington_obvious(editor) {
	//  State of [[Washington]]
	//  Warning: Assumes [[Washington]] is an old link to the state; could nail
	//  "state of [[Washington]], D.C." or "state of [[Washington]]'s mind"
	//  in a sentence if Washington was improperly linked to begin with.
	rmfCa_regex_reason(editor, /([Ss])tate(s?) of \[\[Washington\]\]/g, '$1tate$2 of [[Washington (U.S. state)|Washington]]', 'state of [[Washington (U.S. state)]]', 'disambig [[Washington (U.S. state)]]');
	//  Governor of [[Washington]]
	//  Same as previous one, basically, but not much chance of false positives.
	rmfCa_regex_reason(editor, /([Gg])overnor(s?) of \[\[Washington\]\]/g, '$1overnor$2 of [[Washington (U.S. state)|Washington]]', 'governor of [[Washington (U.S. state)]]', 'disambig [[Washington (U.S. state)]]');
	//  [[Washington|State of Wa(sh(ington))]]
	//  rmfCa_regex_reason(editor, /\[\[Washington\s*\|\s*([Ss]tate of W[Aa](?:sh(?:ington))\.?)/ig, '[[Washington (U.S. state)|$1', 'Washington|State of $1', 'disambig [[Washington (U.S. state)]]');
	//  [[Washington|something containing "state"]]
	rmfCa_regex_reason(editor, /\[\[Washington\s*\|\s*([^]]*state)/ig,  '[[Washington (U.S. state)|$1', 'Washington|state', 'disambig [[Washington (U.S. state)]]');
	//  [[Washington]] state
	rmfCa_regex_reason(editor, /\[\[Washington\]\] ([Ss])tate\b/g, '[[Washington (U.S. state)|Washington $1tate]]', '[[Washington state]]', 'disambig [[Washington state]]');
	//  [[Washington|WA]] or [[Washington|Wa.]] or [[Washtington|Wa(sh). State]]
	// (but not "[[Washington|Wash.]]" by itself)
	rmfCa_regex_reason(editor, /\[\[Washington\s*\|\s*(W(?:[Aa](?:\.?|(?:sh(?:ington))\.? [Ss]tate))(\s+[^]]+)?)\s*\]\]/g, '[[Washington (U.S. state)|$1]]', 'disambig [[Washington (U.S. state)]] abbreviation', 'disambig [[Washington (U.S. state)]]');
	//  [[Seattle]], [[Washington]] (a U.S. city not requiring state name)
	rmfCa_regex_reason(editor, /\[\[\s*Seattle(?:,? ?(?:WA|Washington)?(?:\|\s*Seattle\s*)?)?\]\]\s*(,|\sin)\s*\[\[Washington\]\]/g, '[[Seattle]]$1 [[Washington (U.S. state)|Washington]]', 'disambig [[Seattle]], [[Washington (U.S. state)]]');
	//  [[Something, Washington|Something]],/in [[Washington]] besides Seattle
	rmfCa_regex_reason(editor, /\[\[\s*([^]]+), W(?:A|ashington)\|\s*\1\s*\]\]\s*(,|\sin)\s*\[\[Washington\]\]/g, '[[$1, Washington|$1]]$2 [[Washington (U.S. state)|Washington]]', 'X,/in [[Washington (U.S. state)]]', 'disambig [[Washington (U.S. state)]]');
	//  some western state "&"/"and"/"or" some direction in [[Washington]]
	//  Working: tested on [[Oregon Penutian languages]]
	rmfCa_regex_reason(editor, /((?:(?:Alask|British Columbi|Montan)a|Idaho|Oregon|Utah)(?:\]\])?,?\s+(?:&|and|or)\s+(?:(?:[Nn]or|[Ss]ou)th-?)?(?:(?:[Ee]a|[Ww]e)st)?ern)\s+\[\[Washington\]\]/g, '$1 [[Washington (U.S. state)|Washington]]', 'state and E/N/S/W Washington', 'disambig [[Washington (U.S. state)]]');
}

//  washington_loose: disambiguations with less reliability
//  IN TESTING
function rmfCa_washington_loose(editor) {
	// fix parameters like
	// |*state=Washington -> |*state=[[Washington (U.S. state)|Washington]]
	rmfCa_regex_reason(editor, /(\|\s*\w+\s*[Ss]tate\d*\s*=\s*)(?:Washington|\[\[Washington\]\])(\s|\|)/g, '$1[[Washington (U.S. state)|Washington]]$2', 'Washington in state parameter', 'disambig [[Washington (U.S. state)]] in parameters');
	//  Washington D.C.: Ambassador of/embassy in [[Washington]]
	rmfCa_regex_reason(editor, /([Aa]mbassador\s+(?:in|of|to)|(?:[Cc]onsulate|[Ee]mbassy)\s+in)\s+\[\[Washington\]\]/g, '$1 [[Washington, D.C.|Washington]]', 'ambassador/consulate/embassy in [[Washington, D.C.]]', 'disambig [[Washington, D.C.]]');
	//  { { TOCStates } } using [[Washington]] as a title
	//  Failing: Can't seem to get newline to work before ==
	//  rmfCa_regex_reason(editor, /(\{\{TOCStates\}\}.*\s)(==+)\s*\[\[\s*Washington\s*\]\]\s*\2/, '$1$2[[Washington (U.S. state)|Washington]]$2', 'TOCStates header means [[Washington (U.S. state)]]', 'disambig [[Washington, D.C.]]');
}
// </pre>