#!/usr/bin/env python # -*- coding: utf-8 -*- from BeautifulSoup import BeautifulSoup import re doc = ['<html><head><title>Page title</title></head>', '<body><p id="firstpara" align="center">This is paragraph <b>one</b>.', '<p id="secondpara" align="blah">This is paragraph <b>two</b>.<pre>Preish</pre>', '</html>'] soup = BeautifulSoup(''.join(doc)) print soup.prettify() titleTag = soup.html.pre print titleTag
#!/usr/bin/env python # -*- coding: utf-8 -*- from BeautifulSoup import BeautifulSoup import re if __name__ == "__main__": fo=open('wx.html','r').read() soup = BeautifulSoup(fo) k=1 for cod in soup.findAll('pre','code'): print cod.string newf='zorf'+str(k) fp=open(newf+'.py','w') fp.write(cod.string) fp.close() k+=1 exit(0)
My interest is not for scraping pages for pre formatted -code-, it is to search pages for reference to specific spectral frequencies that I associate with strange matter that does not occur naturally like graphene nanotubes, plutonium or some specific spectra combinations which I would associate with a strange matter configuration. These are just constructed tools that serve as a testable base for gathering and parsing generally.
0 comments:
Post a Comment