#!/usr/bin/python """ rss2latex Author: Nathan Charles Version: 0.1 This program renders a pdf of rss feeds This program has no warranty to the full extent of the law """ import urllib2 import sys import time from xml.dom import minidom def convert_to_tex(input): """ Strip out HTML codes and replace with LaTeX """ out = input.replace("’","'") out = out.replace("–","-") out = out.replace("—","--") out = out.replace("“","``") out = out.replace("”","''") out = out.replace("\…","\ldots") out = out.replace("
","\\\\") out = out.replace("

","") out = out.replace("

","\\\\\n") out = out.replace("&","\\amper") out = out.replace("%","\%") out = out.replace("$","\$") out = out.replace("_","\_") out = out.replace(u"\u2019","?") out = out.replace(u'\xa0',"?") return out def renderfeed(address): texstream = "\\section*{\href{%s}{%s}}\\\\\n" % (address, address) file_request = urllib2.Request(address) file_opener = urllib2.build_opener() file_feed = file_opener.open(file_request).read() file_xml = minidom.parseString(file_feed) item_node = file_xml.getElementsByTagName("item") for item in item_node: title = item.getElementsByTagName("title")[0].firstChild.data link = item.getElementsByTagName("link")[0].firstChild.data timestamp = item.getElementsByTagName('pubDate')[0].firstChild.data entry = item.getElementsByTagName('content:encoded')[0].firstChild.data #rtime = time.strptime(timestamp,"%a %d %b %Yi %H:%M:%S %Z") ctitle = convert_to_tex(title) texstream += "\n\\subsection*{%s}" % ctitle texstream += "\n\\noindent \href{%s}{%s}" % (link, link) texstream += "\n\\timestamp{%s}\\\\" % timestamp print >> sys.stderr, "attempting converstion" clean = convert_to_tex(entry) if clean == entry: print >> sys.stderr, "conversion failed" texstream += entry else: print >> sys.stderr, "success" texstream += clean return texstream texheader = """ %!TEX TS-program = xelatex %!TEX encoding = UTF-8 Unicode \\documentclass[11pt, a4paper]{article} \\usepackage{fontspec} \\usepackage{geometry} \\geometry{a4paper, textwidth=5.5in, textheight=8.5in, marginparsep=7pt, marginparwidth=.6in} \\setlength\\parindent{0in} \\defaultfontfeatures{Mapping=tex-text} \\setromanfont [Ligatures={Common}, Numbers={OldStyle}]{Hoefler Text} \\setmonofont[Scale=0.8]{Monaco} \\setsansfont[Scale=0.9]{Optima Regular} \\newcommand{\\amper}{{\\fontspec[Scale=.95]{Hoefler Text}\\selectfont\\itshape\\&}} \\newcommand{\\timestamp}[1]{\\marginpar{\\scriptsize #1}} \\usepackage{sectsty} \\usepackage[normalem]{ulem} \\sectionfont{\\sffamily\\mdseries\\large\\underline} \\subsectionfont{\\rmfamily\\mdseries\\scshape\\normalsize} \\subsubsectionfont{\\rmfamily\\bfseries\\upshape\\normalsize} \\usepackage[dvipdfm, bookmarks, colorlinks, breaklinks, pdftitle={rss2latex}]{hyperref} \\hypersetup{linkcolor=blue,citecolor=blue,filecolor=black,urlcolor=blue} \\begin{document} \\reversemarginpar\\textsf{rss2latex} """ texfooter = """ \\end{document} """ def rss2latex(feeds): if feeds: latexfile = texheader for feed in feeds: latexfile += renderfeed(feed) latexfile += texfooter return latexfile def main (): filename = 'rss2latex.tex' address = ['http://char1es.net/feed/'] texsource = rss2latex(address) texfile = open(filename, 'w') texfile.write(texsource) texfile.close() main() if __name__ == "__main__": main()