#!/usr/bin/python
# requires python-argparse, python-bs4

import warnings, argparse

parser = argparse.ArgumentParser(
 description='Converts Netscape style bookmarks html from stdin to a directory tree.',
 epilog='Remember to always verify output.')
parser.add_argument('-o', '--outdir',
 help='Set output directory; must not exist (by default, a new folder in the current directory)')
parser.add_argument('-t', '--type', choices=['ie', 'desktop'], default='ie',
 help="""Set output type; 'ie' for simple IE-style bookmarks, or 'desktop' for freedesktop link files.""")
args = parser.parse_args()

import os, sys, string
from bs4 import BeautifulSoup

if args.outdir == None:
 import tempfile
 outdir = tempfile.mkdtemp(prefix='bookmarks-', dir='.')
else:
 os.mkdir(args.outdir)
 outdir = args.outdir

soup = BeautifulSoup(sys.stdin)

countA = len(soup.find_all('a'))
countB = 0

def cleanString(string_old):
 whitelist = string.letters + string.digits + '.,'
 string_new = ''
 for char in string_old:
  if char in whitelist:
   string_new += char
  else:
   string_new += '-'
 return string_new

def writeBookmarkFile(path, name, url):
 '''Write bookmark file into path directory.'''
 global countB
 fileName = cleanString(name)
 if args.type == 'ie':
  with open(path + '/' + fileName + '.url', 'w') as file:
   file.write('[InternetShortcut]\nURL=' + url + '\n')
 elif args.type == 'desktop':
  with open(path + '/' + fileName + '.desktop', 'w') as file:
   file.write('[Desktop Entry]\nType=Link\nVersion=1.0\nName=' + fileName + '\nEncoding=UTF-8\nURL=' + url + '\n')
 countB = countB + 1

def parseFolder(h3Tag, outdir):
 '''Parse folder, recursively calling myself for child folders.'''
 path = outdir + '/' + cleanString(h3Tag.string)
 os.mkdir(path)
 for bookmark in h3Tag.find_next_sibling('dl').find_all('a', recursive=False):
  writeBookmarkFile(path, bookmark.string, bookmark['href'])
 for folder in h3Tag.find_next_sibling('dl').find_all('h3', recursive=False):
  parseFolder(folder, path)

for dt in soup.dl.find_all('dt'):
 dt.replace_with_children()

parseFolder(soup.body.h1, outdir)

print str(countA) + ' <a> tags found in bookmarks; ' + str(countB) + ' bookmark files written.\nOutput folder:\n' + outdir
