001    package org.util.html.factory;
002    
003    
004    import java.util.*;
005    import java.io.*;
006    import java.net.*;
007    import java.awt.*;
008    import java.awt.event.*;
009    import javax.swing.*;
010    import javax.swing.event.*;
011    
012    import org.util.html.objects.*;
013    import org.util.log.*;
014    
015    import org.util.xml.parse.*;
016    import org.util.xml.parse.policy.*;
017    import org.util.xml.element.*;
018    
019    public class HTMLDocumentFactory {
020    
021        private LogListener log_listener_;
022        private URLConnection connection_;
023        private ParserPolicy html_document_parser_policy_;
024        private ParserPolicy head_tag_parser_policy_;
025        private ParserPolicy body_tag_parser_policy_;
026        private HTMLDocument current_document_;
027    
028        public HTMLDocumentFactory() {
029    
030            URLConnection.setDefaultAllowUserInteraction(true);
031            //      User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.13) Gecko/2009080317 Fedora/3.0.13-1.fc10 Firefox/3.0.13 GTB5
032    
033            html_document_parser_policy_ = new HTMLParserPolicy(){
034                    @Override public boolean throwExceptionIfDocumentHasError() {
035                        return false;
036                    }
037                    @Override public Element allowElement(Element element) {
038                        return element;
039                    }
040                    @Override public ParserPolicy getInnerPolicy(Element element) {
041                        if(!element.isTagElement()) return null;
042                        TagElement tag = (TagElement)element;
043                        if(tag.getKey().toLowerCase().equals("body"))
044                            return body_tag_parser_policy_;
045                        else if(tag.getKey().toLowerCase().equals("head"))
046                            return head_tag_parser_policy_;
047                        return this;
048                    }
049                };
050    
051            head_tag_parser_policy_ = new HTMLParserPolicy(){
052                    @Override public boolean throwExceptionIfDocumentHasError() {
053                        return false;
054                    }
055                    @Override public Element allowElement(Element element) {
056                        super.allowElement(element);
057                        return element;
058                    }
059                };
060    
061            body_tag_parser_policy_ = new HTMLParserPolicy(){
062                    @Override public boolean throwExceptionIfDocumentHasError() {
063                        return false;
064                    }
065                    @Override public Element allowElement(Element element) {
066                        if(element.isTextElement()) {
067                            TextElement text = (TextElement)element;
068                            HTMLText tobj = new HTMLText(current_document_);
069                            tobj.setText(text.getValue());
070                            current_document_.add(tobj);
071                            return element;
072                        } else {
073                            TagElement tag = (TagElement)element;
074                            String key = tag.getKey().toLowerCase();
075                            if(key.equals("img")) {
076                                HTMLImg timg = new HTMLImg(current_document_);
077                                try{
078                                    timg.setURL(new URL(tag.getAttributeValue("src")));
079                                }catch(Exception e){}
080                                current_document_.add(timg);
081                                return element;
082                            }
083                        }
084                        return element;
085                    }
086                };
087        }
088    
089        public void setLogListener(LogListener log_listener) {
090            log_listener_ = log_listener;
091        }
092    
093        public HTMLDocument createDocument(URL url, HTMLDocument doc) throws Exception {
094            connection_ = url.openConnection();
095            return createDocument(url, connection_, connection_.getInputStream(), doc);
096        }
097    
098        public HTMLDocument createDocument(URL url, URLConnection connection, InputStream is, HTMLDocument document) throws Exception {
099            assert is != null;
100    
101            if(document==null)
102                document = new HTMLDocument();
103    
104            current_document_ = document;
105            current_document_.clear();
106            current_document_.setDocumentBase(url);
107            connection_ = connection;
108            ElementParser parser = null;
109            String encoding = null;
110            if(connection_!=null)
111                encoding = connection_.getContentEncoding();
112            if(encoding != null)
113                parser = new ElementParser(is, encoding);
114            else
115                parser = new ElementParser(is);
116            
117            parser.setPolicy(html_document_parser_policy_);
118            
119            Element[] element_list = parser.parse();
120    
121            System.out.println("skipped:");
122            System.out.println("---------------------");
123            //for(Element element : element_list)
124            //    System.out.println(element);
125            
126            return current_document_;
127        }
128    
129    }