Java使用DFA算法实现敏感词过滤

架构

SpringBoot + mysql + mybatils-plus

依赖就懒得提供了

sql

初始化一下敏感词表

DROP TABLE IF EXISTS `filter`;
CREATE TABLE `filter`  (
  `ID` int(0) NOT NULL AUTO_INCREMENT COMMENT '主键ID',
  `NAME` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NULL DEFAULT NULL COMMENT '敏感词内容',
  PRIMARY KEY (`ID`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_0900_ai_ci ROW_FORMAT = Dynamic;

-- ----------------------------
-- Records of filter
-- ----------------------------
INSERT INTO `filter` VALUES (1, '你妈');
INSERT INTO `filter` VALUES (2, '傻逼');
INSERT INTO `filter` VALUES (3, '臭傻逼');
INSERT INTO `filter` VALUES (4, '操你妈的');
INSERT INTO `filter` VALUES (5, '你妈没了');
INSERT INTO `filter` VALUES (6, '逼养的');
INSERT INTO `filter` VALUES (7, '傻鸟');

代码

添加实体类

@TableName("filter")
@Builder
@AllArgsConstructor
@NoArgsConstructor
@Data
public class Filter {

    /**
     * 主键
     */
    @TableId(value = "ID", type = IdType.AUTO)
    private Long id;

 /**
  * 敏感词内容
  */
 @TableField(value = "NAME")
    private String name;
}

初始化敏感词容器


@Slf4j
@Configuration
public class FilterRun implements ApplicationRunner {

    private static Map<String, Object> sensitiveWordMap = new HashMap<>();

    @Autowired
    private FilterService filterService;

    @Override
    public void run(ApplicationArguments args) throws Exception {
        // 初始化敏感词过滤器 查询所有关键词,代码自写
        List<Filter> filterList = filterService.selectList();
        log.info("获取到的数据:{}",filterList);
        for (Filter filter : filterList) {
            String name = filter.getName();
            int length = name.length();
            Map<String, Object> currentMap = sensitiveWordMap;

            for (int i = 0; i < length; i++) {
                String c = String.valueOf(name.charAt(i));
                if (!currentMap.containsKey(c)) {
                    currentMap.put(c, new HashMap<>());
                }
                // 获取下一个层级的 Map
                currentMap = (Map<String, Object>) currentMap.get(c);

                if (i == length - 1) {
                    // 标记敏感词结束

                    currentMap.put("end", true);
                }
            }
        }
    }

    // 提供一个方法来获取敏感词Map,方便其他地方使用
    public static Map<String, Object> getSensitiveWordMap() {
        return sensitiveWordMap;
    }
}

 添加工具类


public class SensitiveWordUtil {
    private static final Map<String, Object> sensitiveWordMap = FilterRun.getSensitiveWordMap();

    public static boolean containsSensitiveWord(String text) {
        int len = text.length();
        for (int i = 0; i < len; i++) {
            int matchFlag = checkSensitiveWord(text, i);
            if (matchFlag > 0) {
                return true;
            }
        }
        return false;
    }

    public static String replaceSensitiveWords(String text) {
        StringBuilder result = new StringBuilder();
        int len = text.length();
        int index = 0;

        while (index < len) {
            int matchFlag = checkSensitiveWord(text, index);
            if (matchFlag > 0) {
                for (int j = 0; j < matchFlag; j++) {
                    result.append('*');
                }
                index += matchFlag;
            } else {
                result.append(text.charAt(index));
                index++;
            }
        }
        return result.toString();
    }

    private static int checkSensitiveWord(String txt, int beginIndex) {
        boolean flag = false;
        int matchFlag = 0;
        Map<String, Object> nowMap = sensitiveWordMap;

        for (int i = beginIndex; i < txt.length(); i++) {
            String c = String.valueOf(txt.charAt(i));
            Object nextLevelObj = nowMap.get(c);
            if (nextLevelObj instanceof Map) {
                nowMap = (Map<String, Object>) nextLevelObj;
                matchFlag++;
                if (nowMap.containsKey("end")) {
                    flag = true;
                    break;
                }
            } else {
                break;
            }
        }

        if (matchFlag < 2 || !flag) {
            matchFlag = 0;
        }
        return matchFlag;
    }

}

调用

@Slf4j
@RequestMapping("/filter")
@RestController
public class FilterController {

    @GetMapping("/test")
    public AjaxResult test(@RequestParam String text) {

        boolean b = SensitiveWordUtil.containsSensitiveWord(text);

        log.info("结果:{}",b);

        String s = SensitiveWordUtil.replaceSensitiveWords(text);
        log.info("过滤后的数据信息:{}",s);

        return AjaxResult.success(s);
    }

}

效果

可检测是否包含敏感词与敏感词替换 *

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值