MainClass.java
package com.pkm; import java.util.regex.Matcher; import java.util.regex.Pattern; public class MainClass { public static void main(String[] args) { String str1 = "(=>THAI(คุณ), HINDI:(तुम मेरी हो), HANGERIO:(تو مال منی), CHINA:(您), ARBI(أنت), FARSI(شما)"; System.out.println("\n\nOriginal :: " +str1); Pattern pattern = Pattern.compile("([^\\x00-\\x7F])|([^A-Za-z0-9-_])"); StringBuffer output = new StringBuffer(); Matcher matcher = pattern.matcher(str1); while (matcher.find()) { String mString = matcher.group(0); String rep = String.format("[%d \\%s = %d]", mString.length(), mString, (int) mString.charAt(0)); String rep2 = String.format("&#%d;", (int) mString.charAt(0)); matcher.appendReplacement(output, rep2); } matcher.appendTail(output); System.out.println("Output :: " + output.toString()); pattern = Pattern.compile("\\&\\#\\d{2,}\\;"); matcher = pattern.matcher(output.toString()); output = new StringBuffer(); while (matcher.find()) { String mString = matcher.group(0); mString = mString.substring(2); mString = mString.substring(0, mString.length() - 1); mString = Character.toString((char) Integer.parseInt(mString)); matcher.appendReplacement(output, mString); } matcher.appendTail(output); System.out.println("Output :: " + output.toString()); } }
Output as follows:
Original :: (=>THAI(คุณ), HINDI:(तुम मेरी हो), HANGERIO:(تو مال منی), CHINA:(您), ARBI(أنت), FARSI(شما) Output :: (=>THAI(คุณ), HINDI:(तुम मेरी हो), HANGERIO:(تو مال منی), CHINA:(您), ARBI(أنت), FARSI(شما) Output :: (=>THAI(คุณ), HINDI:(तुम मेरी हो), HANGERIO:(تو مال منی), CHINA:(您), ARBI(أنت), FARSI(شما)