一、公司同事整理的类,挺实用的.相信拿出来分享下他不会介意的O(∩_∩)O.不过如果首字母是数字或英文会有些问题.
复制代码 代码如下:
/**
* Helper_Spell 汉字拼音首字母工具类
*
* @category Helper
* @package Helper_Spell
* @author Lancer <lancer.he@gmail.com>
* @version 1.0
* @see Translation_Big2gb
*/
class Helper_Spell {
/**
* $_pinyins
* @var array
* @access private
*/
private $_pinyins = array(
176161 => "A",
176197 => "B",
178193 => "C",
180238 => "D",
182234 => "E",
183162 => "F",
184193 => "G",
185254 => "H",
187247 => "J",
191166 => "K",
192172 => "L",
194232 => "M",
196195 => "N",
197182 => "O",
197190 => "P",
198218 => "Q",
200187 => "R",
200246 => "S",
203250 => "T",
205218 => "W",
206244 => "X",
209185 => "Y",
212209 => "Z",
215249 => "Z",
);
/**
* $_charset
* @var string
* @access private
*/
private $_charset = null;/**
* __construct 构造函数, 指定需要的编码 default: utf-8 支持utf-8, gb2312
*
* @param unknown_type $charset
*/
public function __construct( $charset = "utf-8" ) {
$this->_charset = $charset;
}
/**
* getInitialsFirst 返回首个汉字的拼音
*
* @access public
* @static
* @param string $str
* @return string
* @example Helper_Spell::getInitialsFirst("我的爱"); => w
*/
public static function getInitialsFirst( $str, $charset = "utf-8" ) {
$chars = array(
"A","B","C","D","E","F",
"G","H","I","J","K","L",
"M","N","O","P","Q","R",
"S","T","U","V","W","X",
"Y","Z");
$string = self::getInitials( $str );
$length = strlen($string);
for($i=0; $i < $length; $i++) {
if ( in_array( $string{$i}, $chars ) ) {
return $string{$i};
}
}
return "*";
}
/**
* getInitials 返回拼音组合
*
* @access public
* @static
* @param string $str
* @return string
* @example Helper_Spell::getInitials("我的爱"); => wda
*/
public static function getInitials( $str, $charset = "utf-8" ) {
$instance = new Helper_Spell( $charset );
return $instance->_getInitials( $str );
}
/**
* _getInitials 获取中文字串的拼音首字符
* 注:英文的字串:不变返回(包括数字) eg .abc123 => abc123
* 中文字符串:返回拼音首字符 eg. 王小明 => WXM
* 中英混合串: 返回拼音首字符和英文 eg. 我i我j => WIWJ
*
* @access private
* @param string $str
* @return string
*/
private function _getInitials( $str, $translation=TRUE ){
if ( empty($str) ) return "";
if ( $this->_isAscii($str[0]) && $this->_isAsciis( $str ))
return $str; if ( $translation )
$str = Translation_Big2gb::big2gb( $str ); $result = array();
if ( $this->_charset == "utf-8" ){
//IGNORE很重要,加上这个就可以是ICONV()函数忽略错误,继续执行
$str = iconv( "utf-8", "gbk//IGNORE", $str );
}
$words = $this->_cutWord( $str );
foreach ( $words AS $word ) {
if ( $this->_isAscii($word) ) {//非中文
$result[] = $word;
continue;
}
$code = ( ord(substr($word,0,1)) ) * 1000 + (ord(substr($word,1,1)));
//获取拼音首字母A--Z
if ( ($i = $this->_search($code)) != -1 ){
$result[] = $this->_pinyins[$i];
}
}
return strtoupper(implode("", $result));
}
/**
* _msubstr 获取中文字符串
*
* @access private
* @param string $str
* @param int $start
* @param int $len
* @return string
*/
private function _msubstr ($str, $start, $len) {
$start = $start * 2;
$len = $len * 2;
$strlen = strlen($str);
$result = "";
for ( $i = 0; $i < $strlen; $i++ ) {
if ( $i >= $start && $i < ($start + $len) ) {
if ( ord(substr($str, $i, 1)) > 129 ) $result .= substr($str, $i, 2);
else $result .= substr($str, $i, 1);
}
if ( ord(substr($str, $i, 1)) > 129 ) $i++;
}
return $result;
}
/**
* _cutWord 字符串切分为数组 (汉字或者一个字符为单位)
*
* @access private
* @param string $str
* @return array
*/
private function _cutWord( $str ) {
$words = array();
while ( $str != "" ) {
if ( $this->_isAscii($str) ) {//非中文
$words[] = $str[0];
$str = substr( $str, strlen($str[0]) );
} else {
$word = $this->_msubstr( $str, 0, 1 );
$words[] = $word;
$str = substr( $str, strlen($word) );
}
}
return $words;
}
/**
* _isAscii 判断字符是否是ascii字符
*
* @access private
* @param string $char
* @return bool
*/
private function _isAscii( $char ) {
return ( ord( substr($char,0,1) ) < 160 );
}
/**
* _isAsciis 判断字符串前3个字符是否是ascii字符
*
* @access private
* @param string $str
* @return bool
*/
private function _isAsciis( $str ) {
$len = strlen($str) >= 3 ? 3: 2;
$chars = array();
for( $i = 1; $i < $len -1; $i++ ){
$chars[] = $this->_isAscii( $str[$i] ) ? "yes":"no";
}
$result = array_count_values( $chars );
if ( empty($result["no"]) ){
return true;
}
return false;
}
/**
* _getChar 通过ASC码返回字母或者数字
*
* @access private
* @param string $ascii
* @return string
*/
private function _getChar( $ascii ){
if ( $ascii >= 48 && $ascii <= 57 ) {
return chr($ascii); //数字
} elseif ( $ascii>=65 && $ascii<=90 ) {
return chr($ascii); // A--Z
} elseif ($ascii>=97 && $ascii<=122 ) {
return chr($ascii-32); // a--z
} else {
return "~"; //其他
}
}
/**
* _search 查找需要的汉字内码(gb2312) 对应的拼音字符(二分法)
*
* @access private
* @param int $code
* @return int
*/
private function _search( $code ) {
$data = array_keys($this->_pinyins);
$lower = 0;
$upper = sizeof($data)-1;
// 排除非一级汉字
if ($code < $data[0] || $code > $data[23]) return -1;
for (;;) {
if ( $lower > $upper ){
return $data[$lower-1];
}
$middle = (int) round(($lower + $upper) / 2);
if ( !isset($data[$middle]) ) {
return -1;
}
if ( $data[$middle] < $code ){
$lower = (int)$middle + 1;
} else if ( $data[$middle] == $code ) {
return $data[$middle];
} else {
$upper = (int)$middle - 1;
}
}// end for
}
}
二、用来得到中文的首字母
这个是将中文转换为拼音的类:charset
复制代码 代码如下:
<?php
/**
* 汉字转化为拼音,拼音转化为汉字
*
*/
class charset
{
private $_code=array(
array("a","-20319"),
array("ai","-20317"),
array("an","-20304"),
array("ang","-20295"),
array("ao","-20292"),
array("ba","-20283"),
array("bai","-20265"),
array("ban","-20257"),
array("bang","-20242"),
array("bao","-20230"),
array("bei","-20051"),
array("ben","-20036"),
array("beng","-20032"),
array("bi","-20026"),
array("bian","-20002"),
array("biao","-19990"),
array("bie","-19986"),
array("bin","-19982"),
array("bing","-19976"),
array("bo","-19805"),
array("bu","-19784"),
array("ca","-19775"),
array("cai","-19774"),
array("can","-19763"),
array("cang","-19756"),
array("cao","-19751"),
array("ce","-19746"),
array("ceng","-19741"),
array("cha","-19739"),
array("chai","-19728"),
array("chan","-19725"),
array("chang","-19715"),
array("chao","-19540"),
array("che","-19531"),
array("chen","-19525"),
array("cheng","-19515"),
array("chi","-19500"),
array("chong","-19484"),
array("chou","-19479"),
array("chu","-19467"),
array("chuai","-19289"),
array("chuan","-19288"),
array("chuang","-19281"),
array("chui","-19275"),
array("chun","-19270"),
array("chuo","-19263"),
array("ci","-19261"),
array("cong","-19249"),
array("cou","-19243"),
array("cu","-19242"),
array("cuan","-19238"),
array("cui","-19235"),
array("cun","-19227"),
array("cuo","-19224"),
array("da","-19218"),
array("dai","-19212"),
array("dan","-19038"),
array("dang","-19023"),
array("dao","-19018"),
array("de","-19006"),
array("deng","-19003"),
array("di","-18996"),
array("dian","-18977"),
array("diao","-18961"),
array("die","-18952"),
array("ding","-18783"),
array("diu","-18774"),
array("dong","-18773"),
array("dou","-18763"),
array("du","-18756"),
array("duan","-18741"),
array("dui","-18735"),
array("dun","-18731"),
array("duo","-18722"),
array("e","-18710"),
array("en","-18697"),
array("er","-18696"),
array("fa","-18526"),
array("fan","-18518"),
array("fang","-18501"),
array("fei","-18490"),
array("fen","-18478"),
array("feng","-18463"),
array("fo","-18448"),
array("fou","-18447"),
array("fu","-18446"),
array("ga","-18239"),
array("gai","-18237"),
array("gan","-18231"),
array("gang","-18220"),
array("gao","-18211"),
array("ge","-18201"),
array("gei","-18184"),
array("gen","-18183"),
array("geng","-18181"),
array("gong","-18012"),
array("gou","-17997"),
array("gu","-17988"),
array("gua","-17970"),
array("guai","-17964"),
array("guan","-17961"),
array("guang","-17950"),
array("gui","-17947"),
array("gun","-17931"),
array("guo","-17928"),
array("ha","-17922"),
array("hai","-17759"),
array("han","-17752"),
array("hang","-17733"),
array("hao","-17730"),
array("he","-17721"),
array("hei","-17703"),
array("hen","-17701"),
array("heng","-17697"),
array("hong","-17692"),
array("hou","-17683"),
array("hu","-17676"),
array("hua","-17496"),
array("huai","-17487"),
array("huan","-17482"),
array("huang","-17468"),
array("hui","-17454"),
array("hun","-17433"),
array("huo","-17427"),
array("ji","-17417"),
array("jia","-17202"),
array("jian","-17185"),
array("jiang","-16983"),
array("jiao","-16970"),
array("jie","-16942"),
array("jin","-16915"),
array("jing","-16733"),
array("jiong","-16708"),
array("jiu","-16706"),
array("ju","-16689"),
array("juan","-16664"),
array("jue","-16657"),
array("jun","-16647"),
array("ka","-16474"),
array("kai","-16470"),
array("kan","-16465"),
array("kang","-16459"),
array("kao","-16452"),
array("ke","-16448"),
array("ken","-16433"),
array("keng","-16429"),
array("kong","-16427"),
array("kou","-16423"),
array("ku","-16419"),
array("kua","-16412"),
array("kuai","-16407"),
array("kuan","-16403"),
array("kuang","-16401"),
array("kui","-16393"),
array("kun","-16220"),
array("kuo","-16216"),
array("la","-16212"),
array("lai","-16205"),
array("lan","-16202"),
array("lang","-16187"),
array("lao","-16180"),
array("le","-16171"),
array("lei","-16169"),
array("leng","-16158"),
array("li","-16155"),
array("lia","-15959"),
array("lian","-15958"),
array("liang","-15944"),
array("liao","-15933"),
array("lie","-15920"),
array("lin","-15915"),
array("ling","-15903"),
array("liu","-15889"),
array("long","-15878"),
array("lou","-15707"),
array("lu","-15701"),
array("lv","-15681"),
array("luan","-15667"),
array("lue","-15661"),
array("lun","-15659"),
array("luo","-15652"),
array("ma","-15640"),
array("mai","-15631"),
array("man","-15625"),
array("mang","-15454"),
array("mao","-15448"),
array("me","-15436"),
array("mei","-15435"),
array("men","-15419"),
array("meng","-15416"),
array("mi","-15408"),
array("mian","-15394"),
array("miao","-15385"),
array("mie","-15377"),
array("min","-15375"),
array("ming","-15369"),
array("miu","-15363"),
array("mo","-15362"),
array("mou","-15183"),
array("mu","-15180"),
array("na","-15165"),
array("nai","-15158"),
array("nan","-15153"),
array("nang","-15150"),
array("nao","-15149"),
array("ne","-15144"),
array("nei","-15143"),
array("nen","-15141"),
array("neng","-15140"),
array("ni","-15139"),
array("nian","-15128"),
array("niang","-15121"),
array("niao","-15119"),
array("nie","-15117"),
array("nin","-15110"),
array("ning","-15109"),
array("niu","-14941"),
array("nong","-14937"),
array("nu","-14933"),
array("nv","-14930"),
array("nuan","-14929"),
array("nue","-14928"),
array("nuo","-14926"),
array("o","-14922"),
array("ou","-14921"),
array("pa","-14914"),
array("pai","-14908"),
array("pan","-14902"),
array("pang","-14894"),
array("pao","-14889"),
array("pei","-14882"),
array("pen","-14873"),
array("peng","-14871"),
array("pi","-14857"),
array("pian","-14678"),
array("piao","-14674"),
array("pie","-14670"),
array("pin","-14668"),
array("ping","-14663"),
array("po","-14654"),
array("pu","-14645"),
array("qi","-14630"),
array("qia","-14594"),
array("qian","-14429"),
array("qiang","-14407"),
array("qiao","-14399"),
array("qie","-14384"),
array("qin","-14379"),
array("qing","-14368"),
array("qiong","-14355"),
array("qiu","-14353"),
array("qu","-14345"),
array("quan","-14170"),
array("que","-14159"),
array("qun","-14151"),
array("ran","-14149"),
array("rang","-14145"),
array("rao","-14140"),
array("re","-14137"),
array("ren","-14135"),
array("reng","-14125"),
array("ri","-14123"),
array("rong","-14122"),
array("rou","-14112"),
array("ru","-14109"),
array("ruan","-14099"),
array("rui","-14097"),
array("run","-14094"),
array("ruo","-14092"),
array("sa","-14090"),
array("sai","-14087"),
array("san","-14083"),
array("sang","-13917"),
array("sao","-13914"),
array("se","-13910"),
array("sen","-13907"),
array("seng","-13906"),
array("sha","-13905"),
array("shai","-13896"),
array("shan","-13894"),
array("shang","-13878"),
array("shao","-13870"),
array("she","-13859"),
array("shen","-13847"),
array("sheng","-13831"),
array("shi","-13658"),
array("shou","-13611"),
array("shu","-13601"),
array("shua","-13406"),
array("shuai","-13404"),
array("shuan","-13400"),
array("shuang","-13398"),
array("shui","-13395"),
array("shun","-13391"),
array("shuo","-13387"),
array("si","-13383"),
array("song","-13367"),
array("sou","-13359"),
array("su","-13356"),
array("suan","-13343"),
array("sui","-13340"),
array("sun","-13329"),
array("suo","-13326"),
array("ta","-13318"),
array("tai","-13147"),
array("tan","-13138"),
array("tang","-13120"),
array("tao","-13107"),
array("te","-13096"),
array("teng","-13095"),
array("ti","-13091"),
array("tian","-13076"),
array("tiao","-13068"),
array("tie","-13063"),
array("ting","-13060"),
array("tong","-12888"),
array("tou","-12875"),
array("tu","-12871"),
array("tuan","-12860"),
array("tui","-12858"),
array("tun","-12852"),
array("tuo","-12849"),
array("wa","-12838"),
array("wai","-12831"),
array("wan","-12829"),
array("wang","-12812"),
array("wei","-12802"),
array("wen","-12607"),
array("weng","-12597"),
array("wo","-12594"),
array("wu","-12585"),
array("xi","-12556"),
array("xia","-12359"),
array("xian","-12346"),
array("xiang","-12320"),
array("xiao","-12300"),
array("xie","-12120"),
array("xin","-12099"),
array("xing","-12089"),
array("xiong","-12074"),
array("xiu","-12067"),
array("xu","-12058"),
array("xuan","-12039"),
array("xue","-11867"),
array("xun","-11861"),
array("ya","-11847"),
array("yan","-11831"),
array("yang","-11798"),
array("yao","-11781"),
array("ye","-11604"),
array("yi","-11589"),
array("yin","-11536"),
array("ying","-11358"),
array("yo","-11340"),
array("yong","-11339"),
array("you","-11324"),
array("yu","-11303"),
array("yuan","-11097"),
array("yue","-11077"),
array("yun","-11067"),
array("za","-11055"),
array("zai","-11052"),
array("zan","-11045"),
array("zang","-11041"),
array("zao","-11038"),
array("ze","-11024"),
array("zei","-11020"),
array("zen","-11019"),
array("zeng","-11018"),
array("zha","-11014"),
array("zhai","-10838"),
array("zhan","-10832"),
array("zhang","-10815"),
array("zhao","-10800"),
array("zhe","-10790"),
array("zhen","-10780"),
array("zheng","-10764"),
array("zhi","-10587"),
array("zhong","-10544"),
array("zhou","-10533"),
array("zhu","-10519"),
array("zhua","-10331"),
array("zhuai","-10329"),
array("zhuan","-10328"),
array("zhuang","-10322"),
array("zhui","-10315"),
array("zhun","-10309"),
array("zhuo","-10307"),
array("zi","-10296"),
array("zong","-10281"),
array("zou","-10274"),
array("zu","-10270"),
array("zuan","-10262"),
array("zui","-10260"),
array("zun","-10256"),
array("zuo","-10254")
);
//拼音转化函数
function PinYin($str){
$ret="";
for($i=0;$i<strlen($str);$i++){
$p=ord(substr($str,$i,1)); //查看ASCII码
if($p>160){ //如果是中文,再多截取一个字符
$q=ord(substr($str,++$i,1));
$p=$p*256+$q-65536;
}
$ret.=$this->convert($p);
}
return $ret;
} //转化函数
function convert($num){
if($num>0&&$num<160){ //如果不是汉字,直接返回相对字符
return chr($num);
}elseif($num<"-20319"||$num>"-10247"){
return "";
}else{ //汉字,查找对应拼音
for($i=count($this->_code)-1;$i>=0;$i--){
if($this->_code[$i][1]<=$num)
break;
}
return substr($this->_code[$i][0],0,1);
}
}
}
下面这个是用来测试代码的:
复制代码 代码如下:
<?php
include_once "charset.class.php";
header("Content-type: text/html;charset=utf-8");
$charset=new charset();
foreach(array("武汉","中国","上海") as $val){
echo iconv("gbk","utf-8//IGNORE",strtoupper($charset->PinYin(mb_convert_encoding($val,"gbk","utf-8"))));
echo "<br/>";
}
三、这是网上找到个一个方法,经过测试可以正常使用,但对一些生僻字或者特殊字符会有问题.
复制代码 代码如下:
function getFirstCharter($str){
if(empty($str)){return "";}
$fchar=ord($str{0});
if($fchar>=ord("A")&&$fchar<=ord("z")) return strtoupper($str{0});
$s1=iconv("UTF-8","gb2312",$str);
$s2=iconv("gb2312","UTF-8",$s1);
$s=$s2==$str?$s1:$str;
$asc=ord($s{0})*256+ord($s{1})-65536;
if($asc>=-20319&&$asc<=-20284) return "A";
if($asc>=-20283&&$asc<=-19776) return "B";
if($asc>=-19775&&$asc<=-19219) return "C";
if($asc>=-19218&&$asc<=-18711) return "D";
if($asc>=-18710&&$asc<=-18527) return "E";
if($asc>=-18526&&$asc<=-18240) return "F";
if($asc>=-18239&&$asc<=-17923) return "G";
if($asc>=-17922&&$asc<=-17418) return "H";
if($asc>=-17417&&$asc<=-16475) return "J";
if($asc>=-16474&&$asc<=-16213) return "K";
if($asc>=-16212&&$asc<=-15641) return "L";
if($asc>=-15640&&$asc<=-15166) return "M";
if($asc>=-15165&&$asc<=-14923) return "N";
if($asc>=-14922&&$asc<=-14915) return "O";
if($asc>=-14914&&$asc<=-14631) return "P";
if($asc>=-14630&&$asc<=-14150) return "Q";
if($asc>=-14149&&$asc<=-14091) return "R";
if($asc>=-14090&&$asc<=-13319) return "S";
if($asc>=-13318&&$asc<=-12839) return "T";
if($asc>=-12838&&$asc<=-12557) return "W";
if($asc>=-12556&&$asc<=-11848) return "X";
if($asc>=-11847&&$asc<=-11056) return "Y";
if($asc>=-11055&&$asc<=-10247) return "Z";
return null;
}