Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
slm-fileview
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
梁杰芳
slm-fileview
Commits
fdf66b60
Commit
fdf66b60
authored
Dec 24, 2017
by
HWliao
Committed by
klboke
Dec 26, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
文本文件编码探测,并转码为utf8
parent
a0ce83a3
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
198 additions
and
0 deletions
+198
-0
DownloadUtils.java
...web/src/main/java/com/yudianbank/utils/DownloadUtils.java
+41
-0
FileCharsetDetector.java
...c/main/java/com/yudianbank/utils/FileCharsetDetector.java
+157
-0
No files found.
jodconverter-web/src/main/java/com/yudianbank/utils/DownloadUtils.java
View file @
fdf66b60
...
@@ -5,6 +5,7 @@ import org.springframework.beans.factory.annotation.Value;
...
@@ -5,6 +5,7 @@ import org.springframework.beans.factory.annotation.Value;
import
org.springframework.stereotype.Component
;
import
org.springframework.stereotype.Component
;
import
java.io.*
;
import
java.io.*
;
import
java.net.*
;
import
java.net.*
;
import
java.nio.charset.Charset
;
import
java.util.UUID
;
import
java.util.UUID
;
/**
/**
...
@@ -71,6 +72,10 @@ public class DownloadUtils {
...
@@ -71,6 +72,10 @@ public class DownloadUtils {
response
.
setContent
(
realPath
);
response
.
setContent
(
realPath
);
// 同样针对类txt文件,如果成功msg包含的是转换后的文件名
// 同样针对类txt文件,如果成功msg包含的是转换后的文件名
response
.
setMsg
(
fileName
);
response
.
setMsg
(
fileName
);
// 转换文件编码为utf8
convertTextPlainFileCharsetToUtf8
(
realPath
);
return
response
;
return
response
;
}
catch
(
IOException
e
)
{
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
e
.
printStackTrace
();
...
@@ -152,4 +157,40 @@ public class DownloadUtils {
...
@@ -152,4 +157,40 @@ public class DownloadUtils {
}
}
return
newType
;
return
newType
;
}
}
/**
* 转换文本文件编码为utf8
* 探测源文件编码,探测到编码切不为utf8则进行转码
* @param filePath 文件路径
*/
private
static
void
convertTextPlainFileCharsetToUtf8
(
String
filePath
)
throws
IOException
{
File
sourceFile
=
new
File
(
filePath
);
if
(
sourceFile
.
exists
()
&&
sourceFile
.
isFile
()
&&
sourceFile
.
canRead
())
{
String
encoding
=
null
;
try
{
FileCharsetDetector
.
Observer
observer
=
FileCharsetDetector
.
guessFileEncoding
(
sourceFile
);
encoding
=
observer
.
getEncoding
();
}
catch
(
IOException
e
)
{
// 编码探测失败,
e
.
printStackTrace
();
}
if
(
encoding
!=
null
&&
!
"UTF-8"
.
equals
(
encoding
)){
// 不为utf8,进行转码
File
tmpUtf8File
=
new
File
(
filePath
+
".utf8"
);
Writer
writer
=
new
OutputStreamWriter
(
new
FileOutputStream
(
tmpUtf8File
),
"UTF-8"
);
Reader
reader
=
new
BufferedReader
(
new
InputStreamReader
(
new
FileInputStream
(
sourceFile
),
encoding
));
char
[]
buf
=
new
char
[
1024
];
int
read
;
while
((
read
=
reader
.
read
(
buf
))
>
0
){
writer
.
write
(
buf
,
0
,
read
);
}
reader
.
close
();
writer
.
close
();
// 删除源文件
sourceFile
.
delete
();
// 重命名
tmpUtf8File
.
renameTo
(
sourceFile
);
}
}
}
}
}
jodconverter-web/src/main/java/com/yudianbank/utils/FileCharsetDetector.java
0 → 100644
View file @
fdf66b60
package
com
.
yudianbank
.
utils
;
import
java.io.BufferedInputStream
;
import
java.io.File
;
import
java.io.FileInputStream
;
import
java.io.FileNotFoundException
;
import
java.io.IOException
;
import
org.mozilla.intl.chardet.nsDetector
;
import
org.mozilla.intl.chardet.nsICharsetDetectionObserver
;
/**
* 文本文件编码探测工具类
*
* @author HWliao
* @date 2017-12-24
*/
public
class
FileCharsetDetector
{
/**
* 传入一个文件(File)对象,检查文件编码
*
* @param file File对象实例
* @return 文件编码,若无,则返回null
* @throws FileNotFoundException
* @throws IOException
*/
public
static
Observer
guessFileEncoding
(
File
file
)
throws
FileNotFoundException
,
IOException
{
return
guessFileEncoding
(
file
,
new
nsDetector
());
}
/**
* <pre>
* 获取文件的编码
* @param file
* File对象实例
* @param languageHint
* 语言提示区域代码 @see #nsPSMDetector ,取值如下:
* 1 : Japanese
* 2 : Chinese
* 3 : Simplified Chinese
* 4 : Traditional Chinese
* 5 : Korean
* 6 : Dont know(default)
* </pre>
*
* @return 文件编码,eg:UTF-8,GBK,GB2312形式(不确定的时候,返回可能的字符编码序列);若无,则返回null
* @throws FileNotFoundException
* @throws IOException
*/
public
static
Observer
guessFileEncoding
(
File
file
,
int
languageHint
)
throws
FileNotFoundException
,
IOException
{
return
guessFileEncoding
(
file
,
new
nsDetector
(
languageHint
));
}
/**
* 获取文件的编码
*
* @param file
* @param det
* @return
* @throws FileNotFoundException
* @throws IOException
*/
private
static
Observer
guessFileEncoding
(
File
file
,
nsDetector
det
)
throws
FileNotFoundException
,
IOException
{
// new Observer
Observer
observer
=
new
Observer
();
// set Observer
// The Notify() will be called when a matching charset is found.
det
.
Init
(
observer
);
BufferedInputStream
imp
=
new
BufferedInputStream
(
new
FileInputStream
(
file
));
byte
[]
buf
=
new
byte
[
1024
];
int
len
;
boolean
done
=
false
;
boolean
isAscii
=
false
;
while
((
len
=
imp
.
read
(
buf
,
0
,
buf
.
length
))
!=
-
1
)
{
// Check if the stream is only ascii.
isAscii
=
det
.
isAscii
(
buf
,
len
);
if
(
isAscii
)
{
break
;
}
// DoIt if non-ascii and not done yet.
done
=
det
.
DoIt
(
buf
,
len
,
false
);
if
(
done
)
{
break
;
}
}
imp
.
close
();
det
.
DataEnd
();
if
(
isAscii
)
{
observer
.
encoding
=
"ASCII"
;
observer
.
found
=
true
;
}
if
(!
observer
.
isFound
())
{
String
[]
prob
=
det
.
getProbableCharsets
();
// // 这里将可能的字符集组合起来返回
// for (int i = 0; i < prob.length; i++) {
// if (i == 0) {
// encoding = prob[i];
// } else {
// encoding += "," + prob[i];
// }
// }
if
(
prob
.
length
>
0
)
{
// 在没有发现情况下,去第一个可能的编码
observer
.
encoding
=
prob
[
0
];
}
else
{
observer
.
encoding
=
null
;
}
}
return
observer
;
}
/**
* @author liaohongwei
* @Description: 文件字符编码观察者, 但判断出字符编码时候调用
* @date 2016年6月20日 下午2:27:06
*/
public
static
class
Observer
implements
nsICharsetDetectionObserver
{
/**
* @Fields encoding : 字符编码
*/
private
String
encoding
=
null
;
/**
* @Fields found : 是否找到字符集
*/
private
boolean
found
=
false
;
@Override
public
void
Notify
(
String
charset
)
{
this
.
encoding
=
charset
;
this
.
found
=
true
;
}
public
String
getEncoding
()
{
return
encoding
;
}
public
boolean
isFound
()
{
return
found
;
}
@Override
public
String
toString
()
{
return
"Observer [encoding="
+
encoding
+
", found="
+
found
+
"]"
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment