姓名解析新算法

功能完善
POI升级版本
master
lenovo 2 years ago
parent 9d87a0850d
commit 9d78e9ea80

@ -47,14 +47,14 @@
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.9</version>
<version>4.1.2</version>
</dependency>
<!--xlsx(07)07版本的-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.9</version>
<version>4.1.2</version>
</dependency>
<dependency>

@ -1,24 +1,31 @@
package com.gmh.controller;
import cn.hutool.core.util.StrUtil;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.gmh.entity.GmhUser;
import com.gmh.entity.R;
import com.gmh.entity.SysBaijiaxing;
import com.gmh.entity.vo.ReadNameVo;
import com.gmh.service.SysBaijiaxingService;
import com.gmh.utils.POIExcelUtil;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.util.*;
import java.util.stream.Collectors;
@RestController
@RequestMapping("/excel")
public class ExcelController {
public final ThreadLocal<Workbook> workbookThreadLocal = new ThreadLocal<>();
private static final String KEY_XINGMING = "姓名";
@RequestMapping("/t01")
public R test01() {
return R.ok("ok");
@ -27,58 +34,139 @@ public class ExcelController {
@RequestMapping("/readData")
public R readSourceData(MultipartFile file) throws IOException {
Workbook workbook = POIExcelUtil.readExcelFromInputStream(file.getInputStream(), file.getOriginalFilename());
Integer nameCellIndex = null;
Map<String, List<GmhUser>> sheetNameList = new LinkedHashMap<>();
List<ReadNameVo> resultList = new ArrayList<>();
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
List<GmhUser> nameList = new ArrayList<>();
ReadNameVo readNameVo = new ReadNameVo();
Sheet sheet = workbook.getSheetAt(i);
for (int r = 0; r <= sheet.getLastRowNum(); r++) {
Row row = sheet.getRow(r);
if (row == null) {
List<Map<String, String>> excelMaps = POIExcelUtil.toListMap(sheet);
if (excelMaps.isEmpty()) {
continue;
}
int lastCellNum = row.getLastCellNum();
for (int c = 0; c < lastCellNum; c++) {
Cell cell = row.getCell(c);
if (cell == null) {
continue;
Map<String, String> firstElement = excelMaps.get(0);
String xmKey = KEY_XINGMING;
for (String key : firstElement.keySet()) {
if (KEY_XINGMING.equals(firstElement.get(key))) {
xmKey = key;
}
String stringCellValue = POIExcelUtil.getStringCellValExcludeBlank(cell);
if (r == 0) {
// 第一行有姓名列
if ("姓名".equals(stringCellValue)) {
nameCellIndex = c;
break;
} else {
// 第一列没有姓名的情况
}
if (firstElement.containsKey(xmKey)) {
if (!KEY_XINGMING.equals(xmKey)) {
excelMaps.remove(firstElement);
}
readNameVo = getReadNameDataForNameKey(excelMaps, xmKey);
} else {
Map<String, Integer> map = new HashMap<>();
for (Map<String, String> excelMap : excelMaps) {
for (String key : excelMap.keySet()) {
String value = excelMap.get(key);
if (isChinaName(value)) {
Integer count = map.getOrDefault(key, 0);
map.put(key, ++count);
}
if (nameCellIndex != null) {
Cell cell = row.getCell(nameCellIndex);
String stringCellValue = POIExcelUtil.getStringCellValExcludeBlank(cell);
if (!"姓名".equals(stringCellValue) && StrUtil.isNotBlank(stringCellValue)) {
GmhUser user = new GmhUser();
user.setName(stringCellValue);
nameList.add(user);
}
} else {
System.out.println("未找到姓名列");
}
List<Map.Entry<String,Integer>> list = new ArrayList<>(map.entrySet());
// list.sort(Comparator.comparingInt(Map.Entry::getValue)); //升序
list.sort((o1, o2) -> (o2.getValue() - o1.getValue()));
String key = list.get(0).getKey();
readNameVo = getReadNameDataForNameKey(excelMaps, key);
}
if (!nameList.isEmpty()) {
sheetNameList.put(sheet.getSheetName(), nameList);
readNameVo.setSheetName(sheet.getSheetName());
resultList.add(readNameVo);
}
return R.ok().setData(resultList);
}
return R.ok().setData(sheetNameList);
public ReadNameVo getReadNameDataForNameKey(List<Map<String, String>> excelMaps, final String key) {
ReadNameVo readNameVo = new ReadNameVo();
// 去重前
List<GmhUser> oldNameList = excelMaps.stream().map(map -> new GmhUser(map.get(key))).collect(Collectors.toList());
// 去重后
LinkedHashSet<GmhUser> newNameList = new LinkedHashSet<>(oldNameList);
readNameVo.setNameList(newNameList);
// 重复检测
int repeatCount = oldNameList.size() - newNameList.size();
if (repeatCount > 0) {
readNameVo.setHasRepeat(true);
readNameVo.setRepeatCount(repeatCount);
} else {
readNameVo.setHasRepeat(false);
readNameVo.setRepeatCount(0);
}
return readNameVo;
}
@RequestMapping("/template")
public R templateUpload(MultipartFile file) throws IOException {
Workbook workbook = POIExcelUtil.readExcelFromInputStream(file.getInputStream(), file.getOriginalFilename());
List<Map<String, String>> maps = POIExcelUtil.toListMap(workbook.getSheetAt(0));
workbookThreadLocal.set(workbook);
return R.ok().setData(maps);
}
@Autowired
private SysBaijiaxingService baijiaxingService;
return R.ok();
private boolean isChinaName(String val) {
QueryWrapper<SysBaijiaxing> queryWrapper = new QueryWrapper<>();
queryWrapper.apply("{0} LIKE CONCAT(xingshi,'%')", val);
List<SysBaijiaxing> list = baijiaxingService.list(queryWrapper);
return !list.isEmpty();
}
public void oleCode() {
// Integer nameCellIndex = null;
// // Sheet:nameList
// List<ReadNameVo> readNameList = new ArrayList<>();
// for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
// List<GmhUser> nameList = new ArrayList<>();
// Sheet sheet = workbook.getSheetAt(i);
// for (int r = 0; r <= sheet.getLastRowNum(); r++) {
// Row row = sheet.getRow(r);
// if (row == null) {
// continue;
// }
// List<List<String>> cellList = new ArrayList<>(row.getLastCellNum());
// for (int c = 0; c < row.getLastCellNum(); c++) {
// Cell cell = row.getCell(c);
// if (cell == null) {
// continue;
// }
// String colString = CellReference.convertNumToColString(cell.getColumnIndex());
// System.out.println(colString);
//
//
// String stringCellValue = POIExcelUtil.getStringCellValExcludeBlank(cell);
// if (r == 0) {
// // 第一行有姓名列
// if ("姓名".equals(stringCellValue)) {
// nameCellIndex = c;
// break;
// } else {
// // 第一行没有指明姓名列
// }
// }
// }
// if (nameCellIndex != null) {
// Cell cell = row.getCell(nameCellIndex);
// String stringCellValue = POIExcelUtil.getStringCellValExcludeBlank(cell);
// if (!"姓名".equals(stringCellValue) && StrUtil.isNotBlank(stringCellValue)) {
// GmhUser user = new GmhUser();
// user.setName(stringCellValue);
// nameList.add(user);
// }
// } else {
// System.out.println("未找到姓名列");
// }
// }
// if (!nameList.isEmpty()) {
// ReadNameVo readNameVo = new ReadNameVo();
// readNameVo.setSheetName(sheet.getSheetName());
// readNameVo.setNameList(nameList);
// readNameList.add(readNameVo);
// }
// }
// return R.ok().setData(readNameList);
}
}

@ -15,4 +15,10 @@ public class GmhUser {
private String name;
public GmhUser() {
}
public GmhUser(String name) {
this.name = name;
}
}

@ -7,7 +7,7 @@ import lombok.Data;
import static com.baomidou.mybatisplus.annotation.IdType.AUTO;
@Data
@TableName("sys_firstname")
@TableName("sys_baijiaxing")
public class SysBaijiaxing {
@TableId(type = AUTO)

@ -0,0 +1,20 @@
package com.gmh.entity.vo;
import com.gmh.entity.GmhUser;
import lombok.Data;
import java.util.LinkedHashSet;
@Data
public class ReadNameVo {
private String sheetName;
private LinkedHashSet<GmhUser> nameList;
// 是否有重复
private Boolean hasRepeat;
// 重复个数
private Integer repeatCount;
}

@ -1,14 +1,16 @@
package com.gmh.utils;
import cn.hutool.core.util.StrUtil;
import cn.hutool.poi.excel.ExcelFileUtil;
import com.sun.org.apache.bcel.internal.generic.IF_ACMPEQ;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.nio.file.Files;
import java.util.*;
/**
* POI Excel
@ -80,10 +82,12 @@ public class POIExcelUtil {
return readExcelFromInputStream(Files.newInputStream(file.toPath()), file.getName());
}
public static Workbook readExcelFromInputStream(InputStream inputStream, String fileName) throws IOException {
if (StrUtil.isBlank(fileName)) {
throw new RuntimeException("文件格式错误");
public static Workbook readExcelFromInputStream(InputStream inputStream) throws IOException {
return readExcelFromInputStream(inputStream, null);
}
public static Workbook readExcelFromInputStream(InputStream inputStream, String fileName) throws IOException {
if (StrUtil.isNotBlank(fileName)) {
String[] split = fileName.split("\\.");
if (split.length == 0 || split.length == 1) {
throw new RuntimeException("文件格式错误");
@ -97,9 +101,111 @@ public class POIExcelUtil {
throw new RuntimeException("文件格式错误");
}
}
if (ExcelFileUtil.isXls(inputStream)) {
return new HSSFWorkbook(inputStream);
} else if (ExcelFileUtil.isXlsx(inputStream)) {
return new XSSFWorkbook(inputStream);
} else {
throw new RuntimeException("文件格式错误");
}
}
public static String getStringCellValExcludeBlank(Cell cell) {
cell.setCellType(Cell.CELL_TYPE_STRING);
return cell.getStringCellValue().replaceAll("\\s*", "");
return getStringCellVal(cell).replaceAll("\\s*", "");
}
public static String getStringCellVal(Cell cell) {
DataFormatter dataFormatter = new DataFormatter();
return dataFormatter.formatCellValue(cell);
}
/**
* Excel List<Map<String, String>>
*
* @param workbook workbook
* @param sheetIndex sheet
* @return List<Map<String, String>>
*/
public static List<Map<String, String>> toListMap(Workbook workbook, int sheetIndex) {
return toListMap(workbook, sheetIndex, -1);
}
/**
* Excel List<Map<String, String>>
*
* @param workbook workbook
* @param sheetIndex sheet
* @param titleRowNum <0
* @return List<Map<String, String>>
*/
public static List<Map<String, String>> toListMap(Workbook workbook, int sheetIndex, int titleRowNum) {
return toListMap(workbook.getSheetAt(sheetIndex), titleRowNum);
}
/**
* Excel List<Map<String, String>>
*
* @param sheet workbook
* @return List<Map<String, String>>
*/
public static List<Map<String, String>> toListMap(Sheet sheet) {
return toListMap(sheet, -1);
}
/**
* Excel List<Map<String, String>>
*
* @param sheet sheet
* @param titleRowNum <0
* @return List<Map<String, String>>
*/
public static List<Map<String, String>> toListMap(Sheet sheet, int titleRowNum) {
List<Map<String, String>> list = new ArrayList<>();
List<String> mapKeys = new ArrayList<>();
if (titleRowNum >= 0) {
Row titleRow = sheet.getRow(titleRowNum);
if (titleRow == null) {
return list;
}
for (int i = 0; i < titleRow.getLastCellNum(); i++) {
Cell cell = titleRow.getCell(i);
if (cell == null) {
break;
}
String key = getStringCellValExcludeBlank(cell);
mapKeys.add(key);
}
} else {
// 根据第一行列的数量去找下面所有的列
Row titleRow = sheet.getRow(0);
if (titleRow == null) {
return list;
}
for (int i = 0; i < titleRow.getLastCellNum(); i++) {
Cell cell = titleRow.getCell(i);
if (cell == null) {
break;
}
String colString = CellReference.convertNumToColString(cell.getColumnIndex());
mapKeys.add(colString);
}
}
for (int i = 0; i <= sheet.getLastRowNum(); i++) {
// 指定了标题列的情况下获取数据时跳过标题列
if (titleRowNum >= 0 && i == titleRowNum) {
continue;
}
Row row = sheet.getRow(i);
if (row == null) {
continue;
}
Map<String, String> map = new LinkedHashMap<>();
for (int c = 0; c < mapKeys.size(); c++) {
map.put(mapKeys.get(c), getStringCellValExcludeBlank(row.getCell(c)));
}
list.add(map);
}
return list;
}
}
Loading…
Cancel
Save