ó {G_Tc@sËddlZddlZddlZddlmZddlmZmZeZ yddlm Z Wne k r}e Z nXdej fd„ƒYZejƒZdd„Zd Zd d gZd d dgZedeeeƒdZd d dGgZd gZedeeeƒdZgZd d dgZedeeeƒdZgZdddddddgZedeeedƒed eeed!ƒd"ZdgZddddd#d$gZed%eeeƒd&ZdgZgZed'eeeƒd(Zd)gZd*gZed+eeed,d-ƒd.ZgZd/gZed0eeed,d-ƒgZd/gZed1eeed,d2ƒd3ZgZd/gZed4eeed,d-ƒd/gZgZed5eeed,d2ƒd(Zd)gZd*gZed6eeed,d7ƒd8Zd9gZd:gZed;eeeƒd<Zd=gZd9gZed>eeeƒd?Zd@gZdAgZedBeeeƒdCej fdD„ƒYZdE„ZedFkrÇde_eƒndS(HiÿÿÿÿN(t test_support(turlopent HTTPError(t HTTPSHandlert RobotTestCasecBs#eZd„Zd„Zd„ZRS(cCsgtjj|ƒ|r,d||f|_nd||f|_||_||_||_||_dS(NsRobotTest(%d, good, %s)sRobotTest(%d, bad, %s)(tunittesttTestCaset__init__tstrtparserturltgoodtagent(tselftindexR R R R ((s1/usr/local/lib/python2.7/test/test_robotparser.pyR s   cCs~t|jtƒr$|j\}}n|j}|j}|jr^|j|jj||ƒƒn|j|jj||ƒƒdS(N( t isinstanceR ttupleR R t assertTrueR t can_fetcht assertFalse(R R R ((s1/usr/local/lib/python2.7/test/test_robotparser.pytrunTests   cCs|jS(N(R(R ((s1/usr/local/lib/python2.7/test/test_robotparser.pyt__str__"s(t__name__t __module__RRR(((s1/usr/local/lib/python2.7/test/test_robotparser.pyR s ttest_robotparsercCs’tj|ƒjƒ}tjƒ}|j|ƒx-|D]%}tjt|||d|ƒƒq5Wx-|D]%}tjt|||d|ƒƒqeWdS(Nii(tStringIOt readlinest robotparsertRobotFileParsertparsetteststaddTestR(Rt robots_txtt good_urlstbad_urlsR tlinesR R ((s1/usr/local/lib/python2.7/test/test_robotparser.pyt RobotTest's   # s’ User-agent: * Disallow: /cyberworld/map/ # This is an infinite virtual URL space Disallow: /tmp/ # these will soon disappear Disallow: /foo.html t/s /test.htmls/cyberworld/map/index.htmls/tmp/xxxs /foo.htmlisÁ # robots.txt for http://www.example.com/ User-agent: * Disallow: /cyberworld/map/ # This is an infinite virtual URL space # Cybermapper knows where to go. User-agent: cybermapper Disallow: t cybermapperis% # go away User-agent: * Disallow: / s/tmp/ism User-agent: figtree Disallow: /tmp Disallow: /a%3cd.html Disallow: /a%2fb.html Disallow: /%7ejoe/index.html s/tmps /tmp.htmls /tmp/a.htmls /a%3cd.htmls /a%3Cd.htmls /a%2fb.htmls/~joe/index.htmlitfigtreeisFigTree Robot libwww-perl/5.04sf User-agent: * Disallow: /tmp/ Disallow: /a%3Cd.html Disallow: /a/b.html Disallow: /%7ejoe/index.html s /a/b.htmls/%7Ejoe/index.htmlis User-Agent: * Disallow: /. isG User-agent: Googlebot Allow: /folder1/myfile.html Disallow: /folder1/ s/folder1/myfile.htmls/folder1/anotherfile.htmliR t GooglebotsJ User-agent: Googlebot Disallow: / User-agent: Googlebot-Mobile Allow: / s/something.jpgi i sGooglebot-MobilesJ User-agent: Googlebot-Mobile Allow: / User-agent: Googlebot Disallow: / i i i t googlebots/ User-agent: * Disallow: /some/path?name=value s /some/paths/some/path?name=valueisK User-agent: * Disallow: /some/path User-agent: * Disallow: /another/path s /another/pathis; User-agent: * Allow: /some/path? Disallow: /another/path? s /some/path?s/another/path?itNetworkTestCasecBs,eZd„Zejedƒd„ƒZRS(c Cstjdƒtjdƒäd}|d}yt|ƒWnEtk r‚}|jddhkr”|jd||jfƒq”nX|jd|ƒtjƒ}|j |ƒy|j ƒWn"t k rß|jd |ƒnX|j |j d |ƒtƒWdQXdS( Ntnetworksmueblesmoraleda.comshttp://mueblesmoraleda.coms /robots.txti‘i“s0%r should return a 401 or 403 HTTP error, not %rs5%r should return a 401 or 403 HTTP error, not succeeds%s is unavailablet*(Rtrequiresttransient_internetRRtcodetskipTestRRtset_urltreadtIOErrort assertEqualRtFalse(R R t robots_urlteR ((s1/usr/local/lib/python2.7/test/test_robotparser.pyttestPasswordProtectedSiteüs*     s$need SSL support to download licensecCsYtjdƒtjdƒ7tjdƒ}|jƒ|j|jddƒƒWdQXdS(NR+swww.python.orgs http://www.python.org/robots.txtR,(RR-R.RRR2RR(R R ((s1/usr/local/lib/python2.7/test/test_robotparser.pyt testPythonOrgs   (RRR8Rt skipUnlesst HAVE_HTTPSR9(((s1/usr/local/lib/python2.7/test/test_robotparser.pyR*ús cCstjtƒtjtƒdS(N(Rt run_unittestRR*(((s1/usr/local/lib/python2.7/test/test_robotparser.pyt test_main!s t__main__(R&s/cyberworld/map/index.html(RRRttestRturllib2RRtTrueR;Rt ImportErrorR5RRt TestSuiteRR$tdocR tbadR*R=Rtverbose(((s1/usr/local/lib/python2.7/test/test_robotparser.pytsœ$                                   '