java过滤html代码

找的一份java过滤html代码,截取html指定长度,保存一下

import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Tag;
import org.jsoup.safety.Whitelist;

public class HtmlFilter {

    // 只有纯文本可以通过
    public static String getText(String html) {
        if (html == null)
            return null;
        return Jsoup.clean(html, Whitelist.none());
    }

    // 以下标签可以通过
    // b, em, i, strong, u. 纯文本
    public static String getSimpleHtml(String html) {
        if (html == null)
            return null;
        return Jsoup.clean(html, Whitelist.simpleText());
    }

    // 以下标签可以通过
    //a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li, ol, p, pre, q, small, strike, strong, sub, sup, u, ul
    public static String getBasicHtml(String html) {
        if (html == null)
            return null;
        return Jsoup.clean(html, Whitelist.basic());
    }

    //在basic基础上  增加图片通过
    public static String getBasicHtmlandimage(String html) {
        if (html == null)
            return null;
        return Jsoup.clean(html, Whitelist.basicWithImages());
    }

    // 以下标签可以通过
    //a, b, blockquote, br, caption, cite, code, col, colgroup, dd, dl, dt, em, h1, h2, h3, h4, h5, h6, i, img, li, ol, p, pre, q, small, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, u, ul
    public static String getFullHtml(String html) {
        if (html == null)
            return null;
        return Jsoup.clean(html, Whitelist.relaxed());
    }

    //只允许指定的html标签
    public static String clearTags(String html, String... tags) {
        Whitelist wl = new Whitelist();
        return Jsoup.clean(html, wl.addTags(tags));
    }

    // 对关键字加上颜色
    public static String markKeywods(String keywords, String target) {
        if (StrKit.notBlank(keywords)) {
            String[] arr = keywords.split(" ");
            for (String s : arr) {
                if (StrKit.notBlank(s)) {
                    String temp = "<span class=\"highlight\">" + s + "</span>";
                    if (temp != null)
                        target = target.replaceAll(s, temp);
                }
            }
        }
        return target;
    }

    // 获取文章中的img url
    public static String getImgSrc(String html) {
        if (html == null)
            return null;
        Document doc = Jsoup.parseBodyFragment(html);
        Element image = doc.select("img").first();
        return image == null ? null : image.attr("src");
    }


    /**
     * 截取字符串长字,保留HTML格式
     *
     * @param content
     * @param len     字符长度
     */
    public static String truncateHTML(String content, int len) {
        Document dirtyDocument = Jsoup.parse(content);
        Element source = dirtyDocument.body();
        Document clean = Document.createShell(dirtyDocument.baseUri());
        Element dest = clean.body();
        truncateHTML(source, dest, len);
        return dest.html();
    }

    /**
     * 使用Jsoup预览
     *
     * @param source 需要过滤的
     * @param dest   过滤后的对象
     * @param len    截取字符长度
     *               <p>
     *               Document dirtyDocument = Jsoup.parse(sb.toString());<br />
     *               Element source = dirtyDocument.body();<br />
     *               Document clean = Document.createShell(dirtyDocument.baseUri());<br />
     *               Element dest = clean.body();<br />
     *               int len = 6;<br />
     *               truncateHTML(source,dest,len);<br />
     *               System.out.println(dest.html());<br />
     */
    private static void truncateHTML(Element source, Element dest, int len) {
        List<Node> sourceChildren = source.childNodes();
        for (Node sourceChild : sourceChildren) {
            if (sourceChild instanceof Element) {
                Element sourceEl = (Element) sourceChild;
                Element destChild = createSafeElement(sourceEl);
                int txt = dest.text().length();
                if (txt >= len) {
                    break;
                } else {
                    len = len - txt;
                }
                dest.appendChild(destChild);
                truncateHTML(sourceEl, destChild, len);
            } else if (sourceChild instanceof TextNode) {
                int destLeng = dest.text().length();
                if (destLeng >= len) {
                    break;
                }
                TextNode sourceText = (TextNode) sourceChild;
                int txtLeng = sourceText.getWholeText().length();
                if ((destLeng + txtLeng) > len) {
                    int tmp = len - destLeng;
                    String txt = sourceText.getWholeText().substring(0, tmp);
                    TextNode destText = new TextNode(txt, sourceChild.baseUri());
                    dest.appendChild(destText);
                    break;
                } else {
                    TextNode destText = new TextNode(sourceText.getWholeText(), sourceChild.baseUri());
                    dest.appendChild(destText);
                }
            }
        }
    }

    /**
     * 按原Element重建一个新的Element
     *
     * @param sourceEl
     * @return
     */
    private static Element createSafeElement(Element sourceEl) {
        String sourceTag = sourceEl.tagName();
        Attributes destAttrs = new Attributes();
        Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
        Attributes sourceAttrs = sourceEl.attributes();
        for (Attribute sourceAttr : sourceAttrs) {
            destAttrs.put(sourceAttr);
        }
        return dest;
    }

    /**
     * 截取字符串长字,不保留HTML格式
     *
     * @param content
     * @param len     字符长度
     */
    public static String truncate(String content, int len) {
        String result = Jsoup.clean(content, Whitelist.none());
        if (result.length() > len) {
            result = result.substring(0, len) + "...";
        }
        return result;
    }
}
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容