/*
 * Copyright (C) 2014 kgto.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 * MA 02110-1301  USA
 */

package Lib;

import java.util.ArrayList;
import java.util.HashMap;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;

/**
 *
 * @author kgto
 */
public class HtmlParserCallback extends HTMLEditorKit.ParserCallback {

    // デバック情報表示フラグ
    final boolean DEBUG = false;
    
    // Tag毎の階層
    HashMap<HTML.Tag,Integer> tagMap = new HashMap<>();
    
    // serach key 情報
    String keytag;
    String keyid;
    String keyclass;
    
    // serach key と一致時の情報退避
    int bufCount = 0;
    HTML.Tag bufTag = null;
    MutableAttributeSet bufAttr = null;
    // serach key と一致時の情報格納ワーク
    StringBuilder bufText;
    
    // serach key と一致時のデータ一覧
    ArrayList sData;
    
    public HtmlParserCallback(SearchData skey) {

        // キー情報展開
        keytag   = skey.getHtmltag();
        keyid    = skey.getHtmlid();
        keyclass = skey.getHtmlclass();
        
        sData    = new ArrayList();
    }
    
    public ArrayList getrtnData() {
        return this.sData;
    }
    
    @Override
    public void handleStartTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
        // Tag毎の階層を保持
        int count = 1;
        if(tagMap.containsKey(tag)) {
            count = tagMap.get(tag);
            count++;
        }
        tagMap.put(tag, count);

        //--- DEBUG OUT ---- start ---
        if(DEBUG) {
            StringBuffer strBuf = new StringBuffer();
            String ret;

            strBuf.append(count).append(" : F : ").append(tag.toString());
            ret = (String)attr.getAttribute(HTML.Attribute.ID);
            if(ret != null) {
                strBuf.append(" [ID] ").append(ret);
            }
            ret = (String)attr.getAttribute(HTML.Attribute.CLASS);
            if(ret != null) {
                strBuf.append(" [CLASS] ").append(ret);
            }
            ret = (String)attr.getAttribute(HTML.Attribute.VALUE);
            if(ret != null) {
                strBuf.append(" [VALUE] ").append(ret);
            }
            System.out.println(strBuf);
        }
        //--- DEBUG OUT ---- end ---
        
        if(bufCount == 0) {
            if(tag.toString().equals(keytag)) {
                if(serachAttribute(attr)) {
                    bufCount = count;
                    bufTag   = tag;
                    bufAttr  = attr;
                    bufText  = new StringBuilder();
                }
            }
        }
    }

    @Override
    public void handleEndTag(HTML.Tag tag, int pos){
        // Tag毎の階層を取得
        int count = 0;
        if(tagMap.containsKey(tag)) {
            count = tagMap.get(tag);
        }

        //--- DEBUG OUT ---- start ---
        if(DEBUG) {
            System.out.println(count + " : E : " + tag.toString());
        }
        //--- DEBUG OUT ---- end ---
        
        if(tag.equals(bufTag) && count <= bufCount) {
            
            // 溜め込んだ一致情報をリストへ格納
            sData.add(bufText.toString());
            
            // 退避したserach keyとの一致情報クリア
            bufCount = 0;
            bufTag   = null;
            bufAttr  = null;
            bufText  = null;
        }

        // Tag毎の階層減算
        tagMap.put(tag, --count);
    }

    @Override
    public void handleText(char[] data, int pos){
        //--- DEBUG OUT ---- start ---
        if(DEBUG) {
            String dat = new String(data);
            System.out.println(dat);
        }
        //--- DEBUG OUT ---- end ---
        String splitchar = "\t";
        
        //制御文字の削除
        // &nbsp; 0xa0
        StringBuilder buf = new StringBuilder();
        for(int i = 0; i < data.length; i++) {
            if(data[i] > 0x1f && data[i] != 0x7f && data[i] != 0xa0) {
                buf.append(data[i]);
            }
        }
        
        if(bufCount > 0) {
            if(bufText.length() > 0) {
                bufText.append(splitchar);
            }
            bufText.append(buf.toString());
        }

    }

    @Override
    public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attr, int pos){
        //--- DEBUG OUT ---- start ---
        if(DEBUG) {
            StringBuffer strBuf = new StringBuffer();
            String ret;
            strBuf.append("x : S : ").append(tag.toString());
            ret = (String)attr.getAttribute(HTML.Attribute.VALUE);
            if(ret != null) {
                strBuf.append(" [VALUE] ").append(ret);
            }
            System.out.println(strBuf);
        }
        //--- DEBUG OUT ---- end ---
    }
    
    /**
     * ページ内のID/CLASS値と検索キーを比較する.
     * @param attr ページのMutableAttributeSet
     * @return boolean 検索キーと一致の時、true
     */
    public boolean serachAttribute(MutableAttributeSet attr) {
        String currentID    = (String)attr.getAttribute(HTML.Attribute.ID);
        String currentClass = (String)attr.getAttribute(HTML.Attribute.CLASS);
        
        if(keyid.isEmpty() == false && keyclass.isEmpty() == false) {
            if(keyid.equals(currentID) && keyclass.equals(currentClass)) {
                return true;
            }
        }
        
        if(keyid.isEmpty() == false) {
            if(keyid.equals(currentID)) {
                return true;
            }
        }
        
        if(keyclass.isEmpty() == false) {
            if(keyclass.equals(currentClass)) {
                return true;
            }
        }
        
        return false;
    }
    
}
