姓名解析新算法

功能完善
POI升级版本
master
lenovo 2 years ago
parent 9d87a0850d
commit 9d78e9ea80

@ -47,14 +47,14 @@
<dependency> <dependency>
<groupId>org.apache.poi</groupId> <groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId> <artifactId>poi</artifactId>
<version>3.9</version> <version>4.1.2</version>
</dependency> </dependency>
<!--xlsx(07)07版本的--> <!--xlsx(07)07版本的-->
<dependency> <dependency>
<groupId>org.apache.poi</groupId> <groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId> <artifactId>poi-ooxml</artifactId>
<version>3.9</version> <version>4.1.2</version>
</dependency> </dependency>
<dependency> <dependency>

@ -1,24 +1,31 @@
package com.gmh.controller; package com.gmh.controller;
import cn.hutool.core.util.StrUtil; import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.gmh.entity.GmhUser; import com.gmh.entity.GmhUser;
import com.gmh.entity.R; import com.gmh.entity.R;
import com.gmh.entity.SysBaijiaxing;
import com.gmh.entity.vo.ReadNameVo;
import com.gmh.service.SysBaijiaxingService;
import com.gmh.utils.POIExcelUtil; import com.gmh.utils.POIExcelUtil;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.Workbook;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController; import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.*;
import java.util.stream.Collectors;
@RestController @RestController
@RequestMapping("/excel") @RequestMapping("/excel")
public class ExcelController { public class ExcelController {
public final ThreadLocal<Workbook> workbookThreadLocal = new ThreadLocal<>();
private static final String KEY_XINGMING = "姓名";
@RequestMapping("/t01") @RequestMapping("/t01")
public R test01() { public R test01() {
return R.ok("ok"); return R.ok("ok");
@ -27,58 +34,139 @@ public class ExcelController {
@RequestMapping("/readData") @RequestMapping("/readData")
public R readSourceData(MultipartFile file) throws IOException { public R readSourceData(MultipartFile file) throws IOException {
Workbook workbook = POIExcelUtil.readExcelFromInputStream(file.getInputStream(), file.getOriginalFilename()); Workbook workbook = POIExcelUtil.readExcelFromInputStream(file.getInputStream(), file.getOriginalFilename());
Integer nameCellIndex = null; List<ReadNameVo> resultList = new ArrayList<>();
Map<String, List<GmhUser>> sheetNameList = new LinkedHashMap<>();
for (int i = 0; i < workbook.getNumberOfSheets(); i++) { for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
List<GmhUser> nameList = new ArrayList<>(); ReadNameVo readNameVo = new ReadNameVo();
Sheet sheet = workbook.getSheetAt(i); Sheet sheet = workbook.getSheetAt(i);
for (int r = 0; r <= sheet.getLastRowNum(); r++) { List<Map<String, String>> excelMaps = POIExcelUtil.toListMap(sheet);
Row row = sheet.getRow(r); if (excelMaps.isEmpty()) {
if (row == null) {
continue; continue;
} }
int lastCellNum = row.getLastCellNum(); Map<String, String> firstElement = excelMaps.get(0);
for (int c = 0; c < lastCellNum; c++) { String xmKey = KEY_XINGMING;
Cell cell = row.getCell(c); for (String key : firstElement.keySet()) {
if (cell == null) { if (KEY_XINGMING.equals(firstElement.get(key))) {
continue; xmKey = key;
} }
String stringCellValue = POIExcelUtil.getStringCellValExcludeBlank(cell);
if (r == 0) {
// 第一行有姓名列
if ("姓名".equals(stringCellValue)) {
nameCellIndex = c;
break;
} else {
// 第一列没有姓名的情况
} }
if (firstElement.containsKey(xmKey)) {
if (!KEY_XINGMING.equals(xmKey)) {
excelMaps.remove(firstElement);
} }
readNameVo = getReadNameDataForNameKey(excelMaps, xmKey);
} else {
Map<String, Integer> map = new HashMap<>();
for (Map<String, String> excelMap : excelMaps) {
for (String key : excelMap.keySet()) {
String value = excelMap.get(key);
if (isChinaName(value)) {
Integer count = map.getOrDefault(key, 0);
map.put(key, ++count);
} }
if (nameCellIndex != null) {
Cell cell = row.getCell(nameCellIndex);
String stringCellValue = POIExcelUtil.getStringCellValExcludeBlank(cell);
if (!"姓名".equals(stringCellValue) && StrUtil.isNotBlank(stringCellValue)) {
GmhUser user = new GmhUser();
user.setName(stringCellValue);
nameList.add(user);
} }
} else {
System.out.println("未找到姓名列");
} }
List<Map.Entry<String,Integer>> list = new ArrayList<>(map.entrySet());
// list.sort(Comparator.comparingInt(Map.Entry::getValue)); //升序
list.sort((o1, o2) -> (o2.getValue() - o1.getValue()));
String key = list.get(0).getKey();
readNameVo = getReadNameDataForNameKey(excelMaps, key);
} }
if (!nameList.isEmpty()) { readNameVo.setSheetName(sheet.getSheetName());
sheetNameList.put(sheet.getSheetName(), nameList); resultList.add(readNameVo);
} }
return R.ok().setData(resultList);
} }
return R.ok().setData(sheetNameList);
public ReadNameVo getReadNameDataForNameKey(List<Map<String, String>> excelMaps, final String key) {
ReadNameVo readNameVo = new ReadNameVo();
// 去重前
List<GmhUser> oldNameList = excelMaps.stream().map(map -> new GmhUser(map.get(key))).collect(Collectors.toList());
// 去重后
LinkedHashSet<GmhUser> newNameList = new LinkedHashSet<>(oldNameList);
readNameVo.setNameList(newNameList);
// 重复检测
int repeatCount = oldNameList.size() - newNameList.size();
if (repeatCount > 0) {
readNameVo.setHasRepeat(true);
readNameVo.setRepeatCount(repeatCount);
} else {
readNameVo.setHasRepeat(false);
readNameVo.setRepeatCount(0);
}
return readNameVo;
} }
@RequestMapping("/template") @RequestMapping("/template")
public R templateUpload(MultipartFile file) throws IOException { public R templateUpload(MultipartFile file) throws IOException {
Workbook workbook = POIExcelUtil.readExcelFromInputStream(file.getInputStream(), file.getOriginalFilename()); Workbook workbook = POIExcelUtil.readExcelFromInputStream(file.getInputStream(), file.getOriginalFilename());
List<Map<String, String>> maps = POIExcelUtil.toListMap(workbook.getSheetAt(0));
workbookThreadLocal.set(workbook);
return R.ok().setData(maps);
}
@Autowired
private SysBaijiaxingService baijiaxingService;
return R.ok(); private boolean isChinaName(String val) {
QueryWrapper<SysBaijiaxing> queryWrapper = new QueryWrapper<>();
queryWrapper.apply("{0} LIKE CONCAT(xingshi,'%')", val);
List<SysBaijiaxing> list = baijiaxingService.list(queryWrapper);
return !list.isEmpty();
} }
public void oleCode() {
// Integer nameCellIndex = null;
// // Sheet:nameList
// List<ReadNameVo> readNameList = new ArrayList<>();
// for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
// List<GmhUser> nameList = new ArrayList<>();
// Sheet sheet = workbook.getSheetAt(i);
// for (int r = 0; r <= sheet.getLastRowNum(); r++) {
// Row row = sheet.getRow(r);
// if (row == null) {
// continue;
// }
// List<List<String>> cellList = new ArrayList<>(row.getLastCellNum());
// for (int c = 0; c < row.getLastCellNum(); c++) {
// Cell cell = row.getCell(c);
// if (cell == null) {
// continue;
// }
// String colString = CellReference.convertNumToColString(cell.getColumnIndex());
// System.out.println(colString);
//
//
// String stringCellValue = POIExcelUtil.getStringCellValExcludeBlank(cell);
// if (r == 0) {
// // 第一行有姓名列
// if ("姓名".equals(stringCellValue)) {
// nameCellIndex = c;
// break;
// } else {
// // 第一行没有指明姓名列
// }
// }
// }
// if (nameCellIndex != null) {
// Cell cell = row.getCell(nameCellIndex);
// String stringCellValue = POIExcelUtil.getStringCellValExcludeBlank(cell);
// if (!"姓名".equals(stringCellValue) && StrUtil.isNotBlank(stringCellValue)) {
// GmhUser user = new GmhUser();
// user.setName(stringCellValue);
// nameList.add(user);
// }
// } else {
// System.out.println("未找到姓名列");
// }
// }
// if (!nameList.isEmpty()) {
// ReadNameVo readNameVo = new ReadNameVo();
// readNameVo.setSheetName(sheet.getSheetName());
// readNameVo.setNameList(nameList);
// readNameList.add(readNameVo);
// }
// }
// return R.ok().setData(readNameList);
}
} }

@ -15,4 +15,10 @@ public class GmhUser {
private String name; private String name;
public GmhUser() {
}
public GmhUser(String name) {
this.name = name;
}
} }

@ -7,7 +7,7 @@ import lombok.Data;
import static com.baomidou.mybatisplus.annotation.IdType.AUTO; import static com.baomidou.mybatisplus.annotation.IdType.AUTO;
@Data @Data
@TableName("sys_firstname") @TableName("sys_baijiaxing")
public class SysBaijiaxing { public class SysBaijiaxing {
@TableId(type = AUTO) @TableId(type = AUTO)

@ -0,0 +1,20 @@
package com.gmh.entity.vo;
import com.gmh.entity.GmhUser;
import lombok.Data;
import java.util.LinkedHashSet;
@Data
public class ReadNameVo {
private String sheetName;
private LinkedHashSet<GmhUser> nameList;
// 是否有重复
private Boolean hasRepeat;
// 重复个数
private Integer repeatCount;
}

@ -1,14 +1,16 @@
package com.gmh.utils; package com.gmh.utils;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import cn.hutool.poi.excel.ExcelFileUtil;
import com.sun.org.apache.bcel.internal.generic.IF_ACMPEQ;
import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.util.CellReference;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*; import java.io.*;
import java.nio.file.Files; import java.nio.file.Files;
import java.util.*;
/** /**
* POI Excel * POI Excel
@ -80,10 +82,12 @@ public class POIExcelUtil {
return readExcelFromInputStream(Files.newInputStream(file.toPath()), file.getName()); return readExcelFromInputStream(Files.newInputStream(file.toPath()), file.getName());
} }
public static Workbook readExcelFromInputStream(InputStream inputStream, String fileName) throws IOException { public static Workbook readExcelFromInputStream(InputStream inputStream) throws IOException {
if (StrUtil.isBlank(fileName)) { return readExcelFromInputStream(inputStream, null);
throw new RuntimeException("文件格式错误");
} }
public static Workbook readExcelFromInputStream(InputStream inputStream, String fileName) throws IOException {
if (StrUtil.isNotBlank(fileName)) {
String[] split = fileName.split("\\."); String[] split = fileName.split("\\.");
if (split.length == 0 || split.length == 1) { if (split.length == 0 || split.length == 1) {
throw new RuntimeException("文件格式错误"); throw new RuntimeException("文件格式错误");
@ -97,9 +101,111 @@ public class POIExcelUtil {
throw new RuntimeException("文件格式错误"); throw new RuntimeException("文件格式错误");
} }
} }
if (ExcelFileUtil.isXls(inputStream)) {
return new HSSFWorkbook(inputStream);
} else if (ExcelFileUtil.isXlsx(inputStream)) {
return new XSSFWorkbook(inputStream);
} else {
throw new RuntimeException("文件格式错误");
}
}
public static String getStringCellValExcludeBlank(Cell cell) { public static String getStringCellValExcludeBlank(Cell cell) {
cell.setCellType(Cell.CELL_TYPE_STRING); return getStringCellVal(cell).replaceAll("\\s*", "");
return cell.getStringCellValue().replaceAll("\\s*", ""); }
public static String getStringCellVal(Cell cell) {
DataFormatter dataFormatter = new DataFormatter();
return dataFormatter.formatCellValue(cell);
}
/**
* Excel List<Map<String, String>>
*
* @param workbook workbook
* @param sheetIndex sheet
* @return List<Map<String, String>>
*/
public static List<Map<String, String>> toListMap(Workbook workbook, int sheetIndex) {
return toListMap(workbook, sheetIndex, -1);
}
/**
* Excel List<Map<String, String>>
*
* @param workbook workbook
* @param sheetIndex sheet
* @param titleRowNum <0
* @return List<Map<String, String>>
*/
public static List<Map<String, String>> toListMap(Workbook workbook, int sheetIndex, int titleRowNum) {
return toListMap(workbook.getSheetAt(sheetIndex), titleRowNum);
}
/**
* Excel List<Map<String, String>>
*
* @param sheet workbook
* @return List<Map<String, String>>
*/
public static List<Map<String, String>> toListMap(Sheet sheet) {
return toListMap(sheet, -1);
}
/**
* Excel List<Map<String, String>>
*
* @param sheet sheet
* @param titleRowNum <0
* @return List<Map<String, String>>
*/
public static List<Map<String, String>> toListMap(Sheet sheet, int titleRowNum) {
List<Map<String, String>> list = new ArrayList<>();
List<String> mapKeys = new ArrayList<>();
if (titleRowNum >= 0) {
Row titleRow = sheet.getRow(titleRowNum);
if (titleRow == null) {
return list;
}
for (int i = 0; i < titleRow.getLastCellNum(); i++) {
Cell cell = titleRow.getCell(i);
if (cell == null) {
break;
}
String key = getStringCellValExcludeBlank(cell);
mapKeys.add(key);
}
} else {
// 根据第一行列的数量去找下面所有的列
Row titleRow = sheet.getRow(0);
if (titleRow == null) {
return list;
}
for (int i = 0; i < titleRow.getLastCellNum(); i++) {
Cell cell = titleRow.getCell(i);
if (cell == null) {
break;
}
String colString = CellReference.convertNumToColString(cell.getColumnIndex());
mapKeys.add(colString);
}
}
for (int i = 0; i <= sheet.getLastRowNum(); i++) {
// 指定了标题列的情况下获取数据时跳过标题列
if (titleRowNum >= 0 && i == titleRowNum) {
continue;
}
Row row = sheet.getRow(i);
if (row == null) {
continue;
}
Map<String, String> map = new LinkedHashMap<>();
for (int c = 0; c < mapKeys.size(); c++) {
map.put(mapKeys.get(c), getStringCellValExcludeBlank(row.getCell(c)));
}
list.add(map);
}
return list;
} }
} }
Loading…
Cancel
Save