var rule_id = 0;

function robot_rule (action, robot, url, line_no, line) {
    this.id      = rule_id;
    this.action = action;
    this.robot   = robot;
    this.url     = url;
    this.has_errors = false;
    this.line_no  = line_no;
    this.line     = line;
    rule_id++;
}

var rules      = [];
var robots     = [];

function parse_robots_txt() {
    var txt    = $("#robots_txt").val();
    var lines  = txt.split(/\n/);
    var errors = "";
    var ua     = "";
    var url    = "";
    
    rules   = [];
    rule_id = 0;
    robots  = [];
    
    for (var i in lines) {
        
        var line = lines[i];
        line = line.replace(/#(.*)$/,''); // strip out the comments
        
        if (line.match(/\s*#/) || line.match(/^\s*$/)) {
            // comment or empty line - do nothing
            continue;
        }
        
        if (line.match(/^\s*User-Agent:/i)) {
            ua = line.match(/:\s*(.*)\s*$/)[1];
            if (ua.match(/^(\w+(-|\/|\.){0,1})+$/) || ua.match(/^\*$/)) {
                // bot is ok
                continue;
            }
            else {
                ua = "";
                errors += "Line "+(i*1+1)+": "+line+" # error parsing user agent string <br />";
            }

        }
        
        if (line.match(/^\s*Disallow:/i)) {
            
            if (ua == "") {
                errors += "Line "+(i*1+1)+": "+line+" # no robot for this rule! <br />";
                continue;
            }
            
            url = line.match(/:\s*(.*)\s*$/)[1];
            
            if (url == "") {
                var rule = new robot_rule("allow",ua, url,(i*1+1),line);
                rules.push(rule);
                continue;
            }
            if (!url.match(/^\//)) {
                errors += "Line "+(i*1+1)+": "+line+" # the url should start with / <br />";
                continue;                
            }

            if (url.match(/\$\s*\w+/)) {
                errors += "Line "+(i*1+1)+": "+line+" # you shouldn't enter any text after $ / <br />";
                continue;                
            }

            var rule = new robot_rule("disallow",ua, url,(i*1+1),line);
            rules.push(rule);
            
        }
        
        if (line.match(/^\s*Allow:/i) && !line.match(/^\s*Disallow:/i)) {
            
            if (ua == "") {
                errors += "Line "+(i*1+1)+": "+line+" # no robot for this rule! <br />";
                continue;
            }
            
            url = line.match(/:\s*(.*)\s*$/)[1];

            if (!url.match(/^\//)) {
                errors += "Line "+(i*1+1)+": "+line+" # the url should start with / <br />";
                continue;                
            }

            if (url.match(/\$\s*\w+/)) {
                errors += "Line "+(i*1+1)+": "+line+" # you shouldn't enter any text after $ / <br />";
                continue;                
            }

            var rule = new robot_rule("allow",ua, url,(i*1+1),line);
            rules.push(rule);
            
        }
        
        if (line.match(/^\s*Sitemap:/i)) {
            url = line.match(/:\s*(.*)\s*$/)[1];
            if (!url.match(/^http(s)?\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S+)+$/i)) {
                errors += "Line "+(i*1+1)+": "+line+" # you should enter full url to your sitemap / <br />";
            }
        }
                
    }
    
    for (var i in rules) {
        rule = rules[i];
        var found = false;
        for (var j in robots) {
            if (robots[j] == rule.robot) {
                found = true;
                break;
            }
        }
        
        if (!found) robots.push(rule.robot);
    }
    
    if (errors) {
        $("#robots_txt_errors").html(errors);
        if ($("#robots_txt_errors").is(":hidden")) $("#robots_txt_errors").show();
    }
    else {
        if (!$("#robots_txt_errors").is(":hidden")) $("#robots_txt_errors").hide();
    }
}

function test_urls() {

    if (rules.length == 0) return; // return if we don't have any rules
    
    var txt = $("#urls").val();
    var lines  = txt.split(/\n/);

    var report = "";
    
    for (var i in lines) {
        var line = lines[i];
        if (line.match(/^\s*$/)) continue; // skip empty lines
        if (line.match(/^robots\.txt$/i)) continue;
        
        var url = get_url(line);
        report += '<span class="url">' + url + "</span><br />";
        
        var robots_allowed    = [];
        var robots_disallowed = [];
        
        for (var r in robots) {
            
            var allowed = true;
            var rule = null;
            
            for (var j in rules) {
                if (rules[j].robot == robots[r]) {
                    var regexp = get_regexp(rules[j].url);
                    if (url.match(regexp)) {
                        
                        rule    = rules[j];
                        if (rules[j].action == "allow") {
                            allowed = true;
                            
                        }
                        else {
                            allowed = false;
                        }
                    }
                    
                }
            }
            
            if (allowed && rule) {
                robots_allowed.push(rule);
            }
            else if (rule) {
                robots_disallowed.push(rule);
            }
        }
        
        if (robots_allowed.length > 0) {
            
            report += "Robots allowed: ";
            
            for (var i in robots_allowed) {
                report += robots_allowed[i].robot + " (line "+  robots_allowed[i].line_no + ": "+robots_allowed[i].line+"), ";
            }
            
            report = report.replace(/, $/,'<br />');
            //    report += "Robots allowed: " + robots_allowed.join(", ") + "<br />";
        }
        else {
            if (robots_disallowed.length == 0) {
                    report += "Robots allowed: All robots<br />";
            }
            else {
                var found = false;
                for (var i in robots_disallowed) {
                    if (robots_disallowed[i].name == "*") found = true;
                }
                if (found) {
                    report += "Robots allowed: none<br />";
                }
                else {
                    report += "Robots allowed: *<br />";
                }
            }
        }
        
        if (robots_disallowed.length > 0) {
            
            report += "Robots disallowed: ";
            
            for (var i in robots_disallowed) {
                report += robots_disallowed[i].robot + " (line "+  robots_disallowed[i].line_no + ": "+robots_disallowed[i].line+"), ";
            }
            
            report = report.replace(/, $/,'<br />');
            //+ robots_disallowed.join(", ") + "<br />";
        }
        else {
            report += "Robots disallowed: none <br />";
        }
            
        report += "<br />";
        
    }
    
    if (robots.length > 1) {
        report = report.replace(/\*/g,'All other robots');    
    }
    else {
        report = report.replace(/\*/g,'All robots');    
    }
    
    var robots_txt_url = $.trim($("#robots_txt_url").val());
    if (robots_txt_url != "") {
        report = "<p>Analyzing <strong>"+robots_txt_url+"</strong></p>"+report;
        
    }
    
    $("#results").html(report);
}

function get_regexp(url) {
    var r = new RegExp();
    var tmp = "^" + url.replace(/\./g,'\.').replace(/\*/g,'(.*)');
        
    r.compile(tmp);
    
    return r;
}

function get_url(s) {
    
    var str = s.replace(/^\s*/,'').replace(/\s*$/,'');
    var tmp;
    
    if (str.match(/http/)) {
        tmp = str.replace(/https:\/\//,'').replace(/http:\/\//,'');
        var t = tmp.split('/');
        t.splice(0,1); // remove the domain
        tmp = '/'+t.join('/');
        return tmp;
    }
    
    if (str.match(/\.(.*?)\//)) {
        var t = str.split('/');
        t.splice(0,1); // remove the domain
        tmp = '/'+t.join('/');
        return tmp;
        
    }
    
    if (str.match(/^\//)) {
        return str;
    }
    else {
        return '/'+str;
    }
}

$(document).ready(function() {
    
    $("#goto_urls").click(function() {
            
        var no_robots_err = false;
        var robots_txt    = $.trim($("#robots_txt").val());
        
        if (robots_txt == "") {
            var robots_txt_url = $.trim($("#robots_txt_url").val());
            if (robots_txt_url != "") {
                
                if (!robots_txt_url.match(/\.txt$/i)) {
                    if (!robots_txt_url.match(/\/$/)) robots_txt_url += '/';
                    robots_txt_url += 'robots.txt';
                    $("#robots_txt_url").val(robots_txt_url);
                }
                
                $.ajax({
                    type: "POST",
                    url: 'r.php',
                    data: 'url='+robots_txt_url,
                    cache: false,
                    success: function (data, textStatus) {
                        
                        if (data.match(/Can't load robots\.txt/)) {
                            $("#error-getting-robots").html("Can't load robots.txt");
                            if ($("#error-getting-robots").is(":hidden")) $("#error-getting-robots").show("fast");
                            return;
                        }

                        if (data.match(/Can't load robots\.txt/)) {
                            $("#error-getting-robots").html("Can't load robots.txt");
                            if ($("#error-getting-robots").is(":hidden")) $("#error-getting-robots").show("fast");
                            return;
                        }
                        
                        if (!$("#error-getting-robots").is(":hidden")) $("#error-getting-robots").hide("fast");                        
                        $("#robots_txt").val(data);
                        
                        parse_robots_txt();
                        
                        $("#get_robots_txt").hide();
                        $("#enter_urls").show();

                    },
                    error: function (request, textStatus, errorThrown) {
                        $("#error-getting-robots").html("Can't load robots.txt ");
                        if ($("#error-getting-robots").is(":hidden")) $("#error-getting-robots").show("fast");
                    }
                });
                
                return;
                
            }
            else {
                no_robots_err = true;
            }
        }
        
        if (no_robots_err) {
            $("#error-getting-robots").html("You should <strong>enter</strong> the url or <strong>paste</strong> robots.txt into the box below. ");
            if ($("#error-getting-robots").is(":hidden")) $("#error-getting-robots").show("fast");
        }
        else {
            if (!$("#error-getting-robots").is(":hidden")) $("#error-getting-robots").hide("fast");
            
            parse_robots_txt();
            
            $("#get_robots_txt").hide();
            $("#enter_urls").show();
        }

    });
    
    $("#back_robots_txt").click(function() {
        
        $("#enter_urls").hide();
        $("#get_robots_txt").show();
        
    });
    
    $("#goto_results").click(function() {
        
        test_urls();
        $("#enter_urls").hide();
        $("#analyze").show();
    });
    
    $("#back_urls").click(function() {
        $("#analyze").hide();
        $("#enter_urls").show();
        
    });
    
})
