优化：预览URL特殊字体使用Hutool解决方案

01e310a6 · 陈精华 · kl · a07c9628 · 01e310a6 · 01e310a6
Commit 01e310a6 authored Nov 29, 2019 by 陈精华 Committed by kl Nov 29, 2019
5 changed files
--- a/jodconverter-web/src/main/java/cn/keking/hutool/HexUtil.java
+++ b/jodconverter-web/src/main/java/cn/keking/hutool/HexUtil.java
--- a/jodconverter-web/src/main/java/cn/keking/hutool/StrUtil.java
+++ b/jodconverter-web/src/main/java/cn/keking/hutool/StrUtil.java
+package cn.keking.hutool;
+import java.nio.charset.Charset;
+/**
+ * 字符串工具类
+ * 
+ * @author xiaoleilu
+ *
+ */
+public class StrUtil {
+	public static final String EMPTY = "";
+	/**
+	 * 是否空白符<br>
+	 * 空白符包括空格、制表符、全角空格和不间断空格<br>
+	 *
+	 * @see Character#isWhitespace(int)
+	 * @see Character#isSpaceChar(int)
+	 * @param c 字符
+	 * @return 是否空白符
+	 * @since 4.0.10
+	 */
+	public static boolean isBlankChar(int c) {
+		return Character.isWhitespace(c) || Character.isSpaceChar(c) || c == '\ufeff' || c == '\u202a';
+	}
+	/**
+	 * 是否空白符<br>
+	 * 空白符包括空格、制表符、全角空格和不间断空格<br>
+	 *
+	 * @param c 字符
+	 * @return 是否空白符
+	 * @see Character#isWhitespace(int)
+	 * @see Character#isSpaceChar(int)
+	 * @since 4.0.10
+	 */
+	public static boolean isBlankChar(char c) {
+		return isBlankChar((int) c);
+	}
+	/**
+	 * 字符串是否为空白 空白的定义如下： <br>
+	 * 1、为null <br>
+	 * 2、为不可见字符（如空格）<br>
+	 * 3、""<br>
+	 *
+	 * @param str 被检测的字符串
+	 * @return 是否为空
+	 */
+	public static boolean isBlank(CharSequence str) {
+		int length;
+		if ((str == null) || ((length = str.length()) == 0)) {
+			return true;
+		}
+		for (int i = 0; i < length; i++) {
+			// 只要有一个非空字符即为非空字符串
+			if (false == isBlankChar(str.charAt(i))) {
+				return false;
+			}
+		}
+		return true;
+	}
+	/**
+	 * 字符串是否为空，空的定义如下:<br>
+	 * 1、为null <br>
+	 * 2、为""<br>
+	 * 
+	 * @param str 被检测的字符串
+	 * @return 是否为空
+	 */
+	public static boolean isEmpty(CharSequence str) {
+		return str == null || str.length() == 0;
+	}
+	/**
+	 * 编码字符串
+	 *
+	 * @param str 字符串
+	 * @param charset 字符集，如果此字段为空，则解码的结果取决于平台
+	 * @return 编码后的字节码
+	 */
+	public static byte[] bytes(CharSequence str, Charset charset) {
+		if (str == null) {
+			return null;
+		}
+		if (null == charset) {
+			return str.toString().getBytes();
+		}
+		return str.toString().getBytes(charset);
+	}
+	/**
+	 * {@link CharSequence} 转为字符串，null安全
+	 *
+	 * @param cs {@link CharSequence}
+	 * @return 字符串
+	 */
+	public static String str(CharSequence cs) {
+		return null == cs ? null : cs.toString();
+	}
+	/**
+	 * 解码字节码
+	 *
+	 * @param data 字符串
+	 * @param charset 字符集，如果此字段为空，则解码的结果取决于平台
+	 * @return 解码后的字符串
+	 */
+	public static String str(byte[] data, Charset charset) {
+		if (data == null) {
+			return null;
+		}
+		if (null == charset) {
+			return new String(data);
+		}
+		return new String(data, charset);
+	}
+	/**
+	 * 改进JDK subString<br>
+	 * index从0开始计算，最后一个字符为-1<br>
+	 * 如果from和to位置一样，返回 "" <br>
+	 * 如果from或to为负数，则按照length从后向前数位置，如果绝对值大于字符串长度，则from归到0，to归到length<br>
+	 * 如果经过修正的index中from大于to，则互换from和to example: <br>
+	 * abcdefgh 2 3 =》 c <br>
+	 * abcdefgh 2 -3 =》 cde <br>
+	 *
+	 * @param str String
+	 * @param fromIndex 开始的index（包括）
+	 * @param toIndex 结束的index（不包括）
+	 * @return 字串
+	 */
+	public static String sub(CharSequence str, int fromIndex, int toIndex) {
+		if (isEmpty(str)) {
+			return str(str);
+		}
+		int len = str.length();
+		if (fromIndex < 0) {
+			fromIndex = len + fromIndex;
+			if (fromIndex < 0) {
+				fromIndex = 0;
+			}
+		} else if (fromIndex > len) {
+			fromIndex = len;
+		}
+		if (toIndex < 0) {
+			toIndex = len + toIndex;
+			if (toIndex < 0) {
+				toIndex = len;
+			}
+		} else if (toIndex > len) {
+			toIndex = len;
+		}
+		if (toIndex < fromIndex) {
+			int tmp = fromIndex;
+			fromIndex = toIndex;
+			toIndex = tmp;
+		}
+		if (fromIndex == toIndex) {
+			return EMPTY;
+		}
+		return str.toString().substring(fromIndex, toIndex);
+	}
+	/**
+	 * 切割指定位置之前部分的字符串
+	 *
+	 * @param string 字符串
+	 * @param toIndex 切割到的位置（不包括）
+	 * @return 切割后的剩余的前半部分字符串
+	 */
+	public static String subPre(CharSequence string, int toIndex) {
+		return sub(string, 0, toIndex);
+	}
+	/**
+	 * 切割指定位置之后部分的字符串
+	 *
+	 * @param string 字符串
+	 * @param fromIndex 切割开始的位置（包括）
+	 * @return 切割后后剩余的后半部分字符串
+	 */
+	public static String subSuf(CharSequence string, int fromIndex) {
+		if (isEmpty(string)) {
+			return null;
+		}
+		return sub(string, fromIndex, string.length());
+	}
+	/**
+	 * 指定范围内查找指定字符
+	 *
+	 * @param str 字符串
+	 * @param searchChar 被查找的字符
+	 * @param start 起始位置，如果小于0，从0开始查找
+	 * @param end 终止位置，如果超过str.length()则默认查找到字符串末尾
+	 * @return 位置
+	 */
+	public static int indexOf(final CharSequence str, char searchChar, int start, int end) {
+		final int len = str.length();
+		if (start < 0 || start > len) {
+			start = 0;
+		}
+		if (end > len || end < 0) {
+			end = len;
+		}
+		for (int i = start; i < end; i++) {
+			if (str.charAt(i) == searchChar) {
+				return i;
+			}
+		}
+		return -1;
+	}
+	/**
+	 * 指定范围内查找指定字符
+	 *
+	 * @param str 字符串
+	 * @param searchChar 被查找的字符
+	 * @param start 起始位置，如果小于0，从0开始查找
+	 * @return 位置
+	 */
+	public static int indexOf(final CharSequence str, char searchChar, int start) {
+		if (str instanceof String) {
+			return ((String) str).indexOf(searchChar, start);
+		} else {
+			return indexOf(str, searchChar, start, -1);
+		}
+	}
+	/**
+	 * 指定范围内查找指定字符
+	 *
+	 * @param str 字符串
+	 * @param searchChar 被查找的字符
+	 * @return 位置
+	 */
+	public static int indexOf(final CharSequence str, char searchChar) {
+		return indexOf(str, searchChar, 0);
+	}
+	/**
+	 * 如果字符串是<code>null</code>，则返回指定默认字符串，否则返回字符串本身。
+	 *
+	 * <pre>
+	 * nullToDefault(null, &quot;default&quot;)  = &quot;default&quot;
+	 * nullToDefault(&quot;&quot;, &quot;default&quot;)    = &quot;&quot;
+	 * nullToDefault(&quot;  &quot;, &quot;default&quot;)  = &quot;  &quot;
+	 * nullToDefault(&quot;bat&quot;, &quot;default&quot;) = &quot;bat&quot;
+	 * </pre>
+	 *
+	 * @param str 要转换的字符串
+	 * @param defaultStr 默认字符串
+	 *
+	 * @return 字符串本身或指定的默认字符串
+	 */
+	public static String nullToDefault(CharSequence str, String defaultStr) {
+		return (str == null) ? defaultStr : str.toString();
+	}
+	/**
+	 * 当给定字符串为null时，转换为Empty
+	 *
+	 * @param str 被转换的字符串
+	 * @return 转换后的字符串
+	 */
+	public static String nullToEmpty(CharSequence str) {
+		return nullToDefault(str, EMPTY);
+	}
+}
--- a/jodconverter-web/src/main/java/cn/keking/hutool/URLEncoder.java
+++ b/jodconverter-web/src/main/java/cn/keking/hutool/URLEncoder.java
+package cn.keking.hutool;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Serializable;
+import java.nio.charset.Charset;
+import java.util.BitSet;
+/**
+ * URL编码，数据内容的类型是 application/x-www-form-urlencoded。
+ * 
+ * <pre>
+ * 1.字符"a"-"z"，"A"-"Z"，"0"-"9"，"."，"-"，"*"，和"_" 都不会被编码;
+ * 2.将空格转换为%20 ;
+ * 3.将非文本内容转换成"%xy"的形式,xy是两位16进制的数值;
+ * 4.在每个 name=value 对之间放置 &amp; 符号。
+ * </pre>
+ * 
+ * @author looly,
+ *
+ */
+public class URLEncoder implements Serializable{
+	private static final long serialVersionUID = 1L;
+	// --------------------------------------------------------------------------------------------- Static method start
+	/**
+	 * 默认{@link URLEncoder}<br>
+	 * 默认的编码器针对URI路径编码，定义如下：
+	 * 
+	 * <pre>
+	 * pchar = unreserved（不处理） / pct-encoded / sub-delims（子分隔符） / ":" / "@"
+	 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+	 * sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+	 * </pre>
+	 */
+	public static final URLEncoder DEFAULT = createDefault();
+	/**
+	 * 用于查询语句的{@link URLEncoder}<br>
+	 * 编码器针对URI路径编码，定义如下：
+	 * 
+	 * <pre>
+	 * 0x20 ' ' =》 '+' 
+	 * 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 0x5F, 0x61 to 0x7A as-is 
+	 * '*', '-', '.', '0' to '9', 'A' to 'Z', '_', 'a' to 'z' Also '=' and '&amp;' 不编码
+	 * 其它编码为 %nn 形式
+	 * </pre>
+	 * 
+	 * 详细见：https://www.w3.org/TR/html5/forms.html#application/x-www-form-urlencoded-encoding-algorithm
+	 */
+	public static final URLEncoder QUERY = createQuery();
+	/**
+	 * 创建默认{@link URLEncoder}<br>
+	 * 默认的编码器针对URI路径编码，定义如下：
+	 * 
+	 * <pre>
+	 * pchar = unreserved（不处理） / pct-encoded / sub-delims（子分隔符） / ":" / "@"
+	 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+	 * sub-delims = "!" / "$" / "&amp;" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+	 * </pre>
+	 * 
+	 * @return {@link URLEncoder}
+	 */
+	public static URLEncoder createDefault() {
+		final URLEncoder encoder = new URLEncoder();
+		encoder.addSafeCharacter('-');
+		encoder.addSafeCharacter('.');
+		encoder.addSafeCharacter('_');
+		encoder.addSafeCharacter('~');
+		// Add the sub-delims
+		encoder.addSafeCharacter('!');
+		encoder.addSafeCharacter('$');
+		encoder.addSafeCharacter('&');
+		encoder.addSafeCharacter('\'');
+		encoder.addSafeCharacter('(');
+		encoder.addSafeCharacter(')');
+		encoder.addSafeCharacter('*');
+		encoder.addSafeCharacter('+');
+		encoder.addSafeCharacter(',');
+		encoder.addSafeCharacter(';');
+		encoder.addSafeCharacter('=');
+		// Add the remaining literals
+		encoder.addSafeCharacter(':');
+		encoder.addSafeCharacter('@');
+		// Add '/' so it isn't encoded when we encode a path
+		encoder.addSafeCharacter('/');
+		return encoder;
+	}
+	/**
+	 * 创建用于查询语句的{@link URLEncoder}<br>
+	 * 编码器针对URI路径编码，定义如下：
+	 * 
+	 * <pre>
+	 * 0x20 ' ' =》 '+' 
+	 * 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 0x5F, 0x61 to 0x7A as-is 
+	 * '*', '-', '.', '0' to '9', 'A' to 'Z', '_', 'a' to 'z' Also '=' and '&amp;' 不编码
+	 * 其它编码为 %nn 形式
+	 * </pre>
+	 * 
+	 * 详细见：https://www.w3.org/TR/html5/forms.html#application/x-www-form-urlencoded-encoding-algorithm
+	 * 
+	 * @return {@link URLEncoder}
+	 */
+	public static URLEncoder createQuery() {
+		final URLEncoder encoder = new URLEncoder();
+		// Special encoding for space
+		encoder.setEncodeSpaceAsPlus(true);
+		// Alpha and digit are safe by default
+		// Add the other permitted characters
+		encoder.addSafeCharacter('*');
+		encoder.addSafeCharacter('-');
+		encoder.addSafeCharacter('.');
+		encoder.addSafeCharacter('_');
+		encoder.addSafeCharacter('=');
+		encoder.addSafeCharacter('&');
+		return encoder;
+	}
+	// --------------------------------------------------------------------------------------------- Static method end
+	/** 存放安全编码 */
+	private final BitSet safeCharacters;
+	/** 是否编码空格为+ */
+	private boolean encodeSpaceAsPlus = false;
+	/**
+	 * 构造<br>
+	 * 
+	 * [a-zA-Z0-9]默认不被编码
+	 */
+	public URLEncoder() {
+		this(new BitSet(256));
+		for (char i = 'a'; i <= 'z'; i++) {
+			addSafeCharacter(i);
+		}
+		for (char i = 'A'; i <= 'Z'; i++) {
+			addSafeCharacter(i);
+		}
+		for (char i = '0'; i <= '9'; i++) {
+			addSafeCharacter(i);
+		}
+	}
+	/**
+	 * 构造
+	 * 
+	 * @param safeCharacters 安全字符，安全字符不被编码
+	 */
+	private URLEncoder(BitSet safeCharacters) {
+		this.safeCharacters = safeCharacters;
+	}
+	/**
+	 * 增加安全字符<br>
+	 * 安全字符不被编码
+	 * 
+	 * @param c 字符
+	 */
+	public void addSafeCharacter(char c) {
+		safeCharacters.set(c);
+	}
+	/**
+	 * 移除安全字符<br>
+	 * 安全字符不被编码
+	 * 
+	 * @param c 字符
+	 */
+	public void removeSafeCharacter(char c) {
+		safeCharacters.clear(c);
+	}
+	/**
+	 * 是否将空格编码为+
+	 * 
+	 * @param encodeSpaceAsPlus 是否将空格编码为+
+	 */
+	public void setEncodeSpaceAsPlus(boolean encodeSpaceAsPlus) {
+		this.encodeSpaceAsPlus = encodeSpaceAsPlus;
+	}
+	/**
+	 * 将URL中的字符串编码为%形式
+	 *
+	 * @param path 需要编码的字符串
+	 * @param charset 编码
+	 *
+	 * @return 编码后的字符串
+	 */
+	public String encode(String path, Charset charset) {
+		int maxBytesPerChar = 10;
+		final StringBuilder rewrittenPath = new StringBuilder(path.length());
+		ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar);
+		OutputStreamWriter writer = new OutputStreamWriter(buf, charset);
+		int c;
+		for (int i = 0; i < path.length(); i++) {
+			c = path.charAt(i);
+			if (safeCharacters.get(c)) {
+				rewrittenPath.append((char) c);
+			} else if (encodeSpaceAsPlus && c == ' ') {
+				// 对于空格单独处理
+				rewrittenPath.append('+');
+			} else {
+				// convert to external encoding before hex conversion
+				try {
+					writer.write((char) c);
+					writer.flush();
+				} catch (IOException e) {
+					buf.reset();
+					continue;
+				}
+				byte[] ba = buf.toByteArray();
+				for (int j = 0; j < ba.length; j++) {
+					// Converting each byte in the buffer
+					byte toEncode = ba[j];
+					rewrittenPath.append('%');
+					HexUtil.appendHex(rewrittenPath, toEncode, false);
+				}
+				buf.reset();
+			}
+		}
+		return rewrittenPath.toString();
+	}
+}
--- a/jodconverter-web/src/main/java/cn/keking/hutool/URLUtil.java
+++ b/jodconverter-web/src/main/java/cn/keking/hutool/URLUtil.java
+package cn.keking.hutool;
+import java.nio.charset.StandardCharsets;
+/**
+ * 统一资源定位符相关工具类
+ *
+ * @author xiaoleilu
+ *
+ */
+public class URLUtil {
+	/**
+	 * 标准化URL字符串，包括：
+	 *
+	 * <pre>
+	 * 1. 多个/替换为一个
+	 * </pre>
+	 *
+	 * @param url URL字符串
+	 * @return 标准化后的URL字符串
+	 */
+	public static String normalize(String url) {
+		return normalize(url, false);
+	}
+	/**
+	 * 标准化URL字符串，包括：
+	 *
+	 * <pre>
+	 * 1. 多个/替换为一个
+	 * </pre>
+	 *
+	 * @param url URL字符串
+	 * @param isEncodeBody 是否对URL中body部分的中文和特殊字符做转义（不包括http:和/）
+	 * @return 标准化后的URL字符串
+	 * @since 4.4.1
+	 */
+	public static String normalize(String url, boolean isEncodeBody) {
+		if (StrUtil.isBlank(url)) {
+			return url;
+		}
+		final int sepIndex = url.indexOf("://");
+		String pre;
+		String body;
+		if (sepIndex > 0) {
+			pre = StrUtil.subPre(url, sepIndex + 3);
+			body = StrUtil.subSuf(url, sepIndex + 3);
+		} else {
+			pre = "http://";
+			body = url;
+		}
+		final int paramsSepIndex = StrUtil.indexOf(body, '?');
+		String params = null;
+		if (paramsSepIndex > 0) {
+			params = StrUtil.subSuf(body, paramsSepIndex);
+			body = StrUtil.subPre(body, paramsSepIndex);
+		}
+		// 去除开头的\或者/
+		body = body.replaceAll("^[\\\\/]+", StrUtil.EMPTY);
+		// 替换多个\或/为单个/
+		body = body.replace("\\", "/").replaceAll("//+", "/");
+		if (isEncodeBody) {
+			body = URLEncoder.DEFAULT.encode(body, StandardCharsets.UTF_8);
+		}
+		return pre + body + StrUtil.nullToEmpty(params);
+	}
+}
\ No newline at end of file
--- a/jodconverter-web/src/main/java/cn/keking/utils/DownloadUtils.java
+++ b/jodconverter-web/src/main/java/cn/keking/utils/DownloadUtils.java
 package cn.keking.utils;
 import cn.keking.config.ConfigConstants;
+import cn.keking.hutool.URLUtil;
 import cn.keking.model.FileAttribute;
 import cn.keking.model.ReturnResponse;
 import org.slf4j.Logger;
@@ -29,11 +30,6 @@ public class DownloadUtils {
    private static final String URL_PARAM_FTP_CONTROL_ENCODING = "ftp.control.encoding";
    /**
-     * 一开始测试的时候发现有些文件没有下载下来，而有些可以；当时也是郁闷了好一阵，但是最终还是不得解
-     * 再次测试的时候，通过前台对比url发现，原来参数中有+号特殊字符存在，但是到后之后却变成了空格，突然恍然大悟
-     * 应该是转义出了问题，url转义中会把+号当成空格来计算，所以才会出现这种情况，遂想要通过整体替换空格为加号，因为url
-     * 中的参数部分是不会出现空格的，但是文件名中就不好确定了，所以只对url参数部分做替换
-     * 注: 针对URLEncoder.encode(s,charset)会将空格转成+的情况需要做下面的替换工作
     * @param fileAttribute
     * @return
     */
@@ -43,12 +39,7 @@ public class DownloadUtils {
        ReturnResponse<String> response = new ReturnResponse<>(0, "下载成功!!!", "");
        URL url = null;
        try {
-            urlAddress = replacePlusMark(urlAddress);
+            urlAddress = URLUtil.normalize(urlAddress, true);
-            urlAddress = encodeUrlParam(urlAddress);
-            // 因为tomcat不能处理'+'号，所以讲'+'号替换成'%20%'
-            // 也不能处理空格
-            urlAddress = urlAddress.replaceAll("\\+", "%20");
-            urlAddress = urlAddress.replaceAll(" ", "%20");
            url = new URL(urlAddress);
        } catch (MalformedURLException e) {
            e.printStackTrace();
@@ -105,84 +96,6 @@ public class DownloadUtils {
        }
    }
-    /**
-     * 注:可能是原来因为前端通过encodeURI来编码的，因为通过encodeURI编码+会被转成+号(亦即没有转)，
-     * 而通过encodeURIComponent则会转成%2B，这样URLDecoder是可以正确处理的，所以也就没有必要在这里替换了
-     * 转换url参数部分的空格为加号(因为在url编解码的过程中出现+转为空格的情况)
-     * @param urlAddress
-     * @return
-     */
-    private String replacePlusMark(String urlAddress) {
-        if (urlAddress.contains("?")) {
-            String nonParamStr = urlAddress.substring(0,urlAddress.indexOf("?") + 1);
-            String paramStr = urlAddress.substring(nonParamStr.length());
-            return nonParamStr + paramStr.replace(" ", "+");
-        }
-        return urlAddress;
-    }
-    /**
-     * 对最有一个路径进行转码
-     * @param urlAddress
-     *          http://192.168.2.111:8013/demo/Handle中文.zip
-     *          http://192.168.2.111:8013/download?id=1&filename=中文.zip
-     * @return
-     */
-    private String encodeUrlParam(String urlAddress){
-        StringBuffer sb = new StringBuffer();
-        for (int i = 0; i < urlAddress.length(); i++) {
-            char c = urlAddress.charAt(i);
-            if (c >= 0 && c <= 255) {
-                sb.append(c);
-            } else {
-                byte[] b;
-                try {
-                    //指定需要的编码类型
-                    b = String.valueOf(c).getBytes("utf-8");
-                } catch (Exception ex) {
-                    System.out.println(ex);
-                    b = new byte[0];
-                }
-                for (int j = 0; j < b.length; j++) {
-                    int k = b[j];
-                    if (k < 0) {
-                        k += 256;
-                    }
-                    sb.append("%" + Integer.toHexString(k).toUpperCase());
-                }
-            }
-        }
-        return sb.toString();
-    }
-    /**
-     * 因为jodConvert2.1不支持ms2013版本的office转换，这里偷懒，尝试看改一下文件类型，让jodConvert2.1去
-     * 处理ms2013，看结果如何，如果问题很大的话只能采取其他方式，如果没有问题，暂时使用该版本来转换
-     * @param type
-     * @return
-     */
-    private String dealWithMS2013(String type) {
-        String newType = null;
-        switch (type){
-            case "docx":
-                newType = "doc";
-            break;
-            case "xlsx":
-                newType = "doc";
-            break;
-            case "pptx":
-                newType = "ppt";
-            break;
-            default:
-                newType = type;
-            break;
-        }
-        return newType;
-    }
  /**
   * 转换文本文件编码为utf8
   * 探测源文件编码,探测到编码切不为utf8则进行转码