Parse new HTML which has <form> inside <form> on cm
authorMagnus Hagander <[email protected]>
Fri, 23 Aug 2019 07:51:16 +0000 (09:51 +0200)
committerMagnus Hagander <[email protected]>
Fri, 23 Aug 2019 07:51:50 +0000 (09:51 +0200)
code/pgeusite/cmutuel/management/commands/cmscrape.py

index f5be767f8104bafe7e26584adb31970a57cc408c..609abf8adc900da73c6b4f8cb85100c8a0e12716 100755 (executable)
@@ -31,10 +31,12 @@ class FormHtmlParser(HTMLParser):
         self.target_url = None
 
     def handle_starttag(self, tag, attrs):
+        if self.target_url:
+            return
         if tag == 'form':
             for k, v in attrs:
                 if k == 'action':
-                    if v.find('telechargement.cgi?'):
+                    if v.find('telechargement.cgi?') >= 0:
                         self.in_form = True
                         self.target_url = v
                         return