ヘボン式変換 - javascript 2014/01/12

ひらがなをヘボン式ローマ字に変換します。 はじめにjavaでの実装してたので、それをjavascript版に置き換えてみました。


実装については、いろいろ参考にしつつ、基本、前回のjava版を置き換える感じやってます。

以下、実装ソース
/* ご自由に使ってください deiji.jp */ function installHebon() { this.map = []; var sss = [ ["あ","A" ], ["い","I" ], ["う","U" ], ["え","E" ], ["お","O" ], ["か","KA" ], ["き","KI" ], ["く","KU" ], ["け","KE" ], ["こ","KO" ], ["さ","SA" ], ["し","SHI" ], ["す","SU" ], ["せ","SE" ], ["そ","SO" ], ["た","TA" ], ["ち","CHI" ], ["つ","TSU" ], ["て","TE" ], ["と","TO" ], ["な","NA" ], ["に","NI" ], ["ぬ","NU" ], ["ね","NE" ], ["の","NO" ], ["は","HA" ], ["ひ","HI" ], ["ふ","FU" ], ["へ","HE" ], ["ほ","HO" ], ["ま","MA" ], ["み","MI" ], ["む","MU" ], ["め","ME" ], ["も","MO" ], ["や","YA" ], ["ゆ","YU" ], ["よ","YO" ], ["ら","RA" ], ["り","RI" ], ["る","RU" ], ["れ","RE" ], ["ろ","RO" ], ["わ","WA" ], ["ゐ","I" ], ["う","U" ], ["ゑ","E" ], ["を","O" ], ["ん","N" ], ["が","GA" ], ["ぎ","GI" ], ["ぐ","GU" ], ["げ","GE" ], ["ご","GO" ], ["ざ","ZA" ], ["じ","JI" ], ["ず","ZU" ], ["ぜ","ZE" ], ["ぞ","ZO" ], ["だ","DA" ], ["ぢ","JI" ], ["づ","ZU" ], ["で","DE" ], ["ど","DO" ], ["ば","BA" ], ["び","BI" ], ["ぶ","BU" ], ["べ","BE" ], ["ぼ","BO" ], ["ぱ","PA" ], ["ぴ","PI" ], ["ぷ","PU" ], ["ぺ","PE" ], ["ぽ","PO" ], ["きゃ","KYA" ], ["きゅ","KYU" ], ["きょ","KYO" ], ["しゃ","SHA" ], ["しゅ","SHU" ], ["しょ","SHO" ], ["ちゃ","CHA" ], ["ちゅ","CHU" ], ["ちょ","CHO" ], ["にゃ","NYA" ], ["にゅ","NYU" ], ["にょ","NYO" ], ["ひゃ","HYA" ], ["ひゅ","HYU" ], ["ひょ","HYO" ], ["みゃ","MYA" ], ["みゅ","MYU" ], ["みょ","MYO" ], ["りゃ","RYA" ], ["りゅ","RYU" ], ["りょ","RYO" ], ["ぎゃ","GYA" ], ["ぎゅ","GYU" ], ["ぎょ","GYO" ], ["じゃ","JA" ], ["じゅ","JU" ], ["じょ","JO" ], ["びゃ","BYA" ], ["びゅ","BYU" ], ["びょ","BYO" ], ["ぴゃ","PYA" ], ["ぴゅ","PYU" ], ["ぴょ","PYO" ], /* この二文字は特別あつかい */ ["ー",""],["っ","っ"]]; for(i = 0; i<sss.length ;i++) { this.map[sss[i][0]] = sss[i][1]; } function toHebon(text) { hebon = "" var pos = 0; var len = text.length; var lastConvert = null; loop: while (pos < len) { c =""; /* * mapで変換 */ convert: { c2: if (pos + 2 <= len) { c = map[text.substring(pos, pos + 2)]; if (!(typeof c === "undefined")) { pos += 2; break convert; } } c1: if (pos < len) { c = map[text.substring(pos, pos + 1)]; if (!(typeof c === "undefined")) { pos += 1; break convert; } } c_nothing: { c = text.substring(pos, pos + 1); pos += 1; } } // convert /* * その他の規則での置き換え */ convert_another_rule: { isLastConvert_Xtu ="っ" == (lastConvert); isCH = c.startsWith("CH"); isLastConvert_N ="N" == lastConvert; isBMP = c.match(/[B|M|P].*/); /* ひつと前の変換が[っ]の場合であとにCHが続く場合 */ if (isLastConvert_Xtu && isCH) { hebon += "T"; break convert_another_rule; } /* ひつと前の変換が[っ]の場合 */ if (isLastConvert_Xtu && !isCH) { hebon += c.substring(0, 1); break convert_another_rule; } /* ひとつ前の変換が「N」の場合かつ 「B」「M」「P」が続く場合 */ if (isLastConvert_N && isBMP) { hebon += "M"; break convert_another_rule; } /* ひとつ前の変換が「N」の場合かつ 「B」「M」「P」が続かない場合 */ if (isLastConvert_N && !isBMP) { hebon += "N"; break convert_another_rule; } } // convert_another_rule /* * 最後に変換したものと+変換したもので母音がつながる場合は、追加しない */ append: { /* 母音がつながる判定 */ isBoon = (lastConvert + c) .match(/.*(AA|II|UU|EE|OO|OU)$/); /* 最後の文字 */ isLastLetter = !(pos < len); /* 「っ」もしくは「ん」 */ isXtuOrN = c.match(/っ/) || c == "N"; /* 母音としてつながっておらず、かつ、 */ if (!isBoon && (!isXtuOrN || isLastLetter)) { hebon += c; } /* 母音がつながったら 最後に変換したものとして扱わない */ if (isBoon) { lastConvert =""; continue; } } // end append lastConvert = c; } // end loop return hebon; } if (typeof String.prototype.startsWith != 'function') { // see below for better implementation! String.prototype.startsWith = function (str){ return this.indexOf(str) == 0; }; } String.prototype.toHebon = function() { return toHebon(this); } } installHebon() ;

: