EditPlus 2.0 或3.30
我們提供的服務(wù)有:成都網(wǎng)站建設(shè)、成都做網(wǎng)站、微信公眾號開發(fā)、網(wǎng)站優(yōu)化、網(wǎng)站認(rèn)證、肅南裕固族自治ssl等。為上千家企事業(yè)單位解決了網(wǎng)站和推廣的問題。提供周到的售前咨詢和貼心的售后服務(wù),是有科學(xué)管理、有技術(shù)的肅南裕固族自治網(wǎng)站制作公司
在哪里都有下的。我一直都用這個,比記事本強(qiáng)多了
它對很多的語言的特殊字都標(biāo)有顏色!
public?class?Test?{
public?static?void?main(String[]?args)?{
Test?t?=new?Test();
File?file?=?new?File("E:\\桌面\\words.txt");
try?{
ListString?list=?t.getWords(file,?true,"h");
for?(String?string?:?list)?{
System.out.print(string+"???");
}
}?catch?(Exception?e)?{
e.printStackTrace();
}
}
/**
?*?java實(shí)現(xiàn)按詞頭、詞尾提取英文文檔中的單詞
?*?@param?file?原文件
?*?@param?isHead?按詞頭true?按詞尾false
?*?@param?fix?關(guān)鍵詞
?*?@return
?*?@throws?Exception?
?*/
public?ListString?getWords(File?file?,?boolean?isHead,String?fix)?throws?Exception{
//讀取文件中的內(nèi)容到字符串str
FileInputStream?fis?=?new?FileInputStream(file);
BufferedInputStream?bis?=?new?BufferedInputStream(fis);
int?i=0;
String?str?=?"";
while?((i=bis.read())!=-1)?{
str+=(char)i;
}
System.out.println(str);
bis.close();
fis.close();
//將str分割為單詞數(shù)組
String[]?words?=?str.split("?");
ListString?list?=?new?ArrayListString();
if?(isHead)?{
for?(String?word?:?words)?{
if?(word.startsWith(fix))?{
list.add(word);
}
}
}else?{
for?(String?word?:?words)?{
if?(word.endsWith(fix))?{
list.add(word);
}
}
}
return?list;
}
}
Java中判斷字符串的編碼有兩種思路:
一種是根據(jù)byte的長度判斷,英文的字母數(shù)字好標(biāo)點(diǎn)符號都是一個byte,且值在0-255之間
另一種是根據(jù)中文的Unicode取值范圍判斷,這個就是把所以的范圍都包含,才能判斷正確,參考unicode中文范圍:
示例代碼:
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class StringTest {
//英文占1byte,非英文(可認(rèn)為是中文)占2byte,根據(jù)這個特性來判斷字符
public static boolean checkChar(char ch) {
if ((ch + "").getBytes().length == 1) {
return true;//英文
} else {
return false;//中文
}
}
public static String checkString(String str) {
String res = "";
if (str != null) {
for (int i = 0; i str.length(); i++) {
//只要字符串中有中文則為中文
if (!checkChar(str.charAt(i))) {
res = "中文";
break;
} else {
res = "英文";
}
}
}
return res;
}
//判斷是不是中文
public static boolean isChinese(char c) {
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
return true;
}
return false;
}
//判斷是不是英文字母
public static boolean isEnglish(String charaString) {
return charaString.matches("^[a-zA-Z]*");
}
//根據(jù)中文unicode范圍判斷u4e00 ~ u9fa5不全
public static String isChinese(String str) {
String regEx1 = "[\\u4e00-\\u9fa5]+";
String regEx2 = "[\\uFF00-\\uFFEF]+";
String regEx3 = "[\\u2E80-\\u2EFF]+";
String regEx4 = "[\\u3000-\\u303F]+";
String regEx5 = "[\\u31C0-\\u31EF]+";
Pattern p1 = Pattern.compile(regEx1);
Pattern p2 = Pattern.compile(regEx2);
Pattern p3 = Pattern.compile(regEx3);
Pattern p4 = Pattern.compile(regEx4);
Pattern p5 = Pattern.compile(regEx5);
Matcher m1 = p1.matcher(str);
Matcher m2 = p2.matcher(str);
Matcher m3 = p3.matcher(str);
Matcher m4 = p4.matcher(str);
Matcher m5 = p5.matcher(str);
if (m1.find() || m2.find() || m3.find() || m4.find() || m5.find())
return "中文";
else
return "英文";
}
public static void main(String[] args) {
System.out.println("使用長度判斷:");
System.out.println(checkString("Hello++"));
System.out.println(checkString("Hello++。、,?"));
System.out.println(checkString("Hello++編程"));
System.out.println(checkString("編程"));
System.out.println("\r\n使用正則表達(dá)式判斷:");
System.out.println(isChinese("Hello++"));
System.out.println(isChinese("Hello++。、,?"));
System.out.println(isChinese("Hello++編程"));
System.out.println(isChinese("編程"));
System.out.println("\r\n使用Character.UnicodeBlock");
System.out.println(isChinese('h')?"中文":"英文");
System.out.println(isChinese(',')?"中文":"英文");
System.out.println(isChinese('。')?"中文":"英文");
System.out.println(isChinese('編')?"中文":"英文");
}
}