Mercurial > hg > TextShaper
comparison textshaper/url2txt.py @ 39:986f8a20c234
STUB: textshaper/url2txt.py
| author | Jeff Hammel <k0scist@gmail.com> |
|---|---|
| date | Thu, 03 Jul 2014 13:46:30 -0700 |
| parents | 56fa70e2e239 |
| children |
comparison
equal
deleted
inserted
replaced
| 38:56fa70e2e239 | 39:986f8a20c234 |
|---|---|
| 6 xclip -o | sed 's/_//' | sed 's/.html//' | 6 xclip -o | sed 's/_//' | sed 's/.html//' |
| 7 """ | 7 """ |
| 8 | 8 |
| 9 import argparse | 9 import argparse |
| 10 import sys | 10 import sys |
| 11 import urlparse | |
| 11 | 12 |
| 12 def url2txt(url): | 13 def url2txt(url, strip_extension=True, replacements=(('_', ' '),)): |
| 13 """gets the text equivalent of a URL""" | 14 """gets the text equivalent of a URL""" |
| 14 url = url.rstrip('/') | 15 |
| 15 if '/' in url: | 16 # parse the url |
| 16 url = url.rsplit('/')[-1] | 17 parsed = urlparse.urlparse(url) |
| 17 if '.' in url: | 18 |
| 18 url = url.split('.', 1)[0] | 19 # process the path, if available |
| 19 url = url.replace('_', ' ') | 20 path = parsed.path.rstrip('/') |
| 20 return url | 21 if path: |
| 22 text = path.split('/')[-1] | |
| 23 if strip_extension: | |
| 24 # strip the extension, if desired | |
| 25 text = text.split('.', 1)[0] | |
| 26 else: | |
| 27 # otherwise go with the hostname | |
| 28 text = parsed.hostname | |
| 29 | |
| 30 # replace desired items | |
| 31 for item, replacement in replacements: | |
| 32 text = text.replace(item, replacement) | |
| 33 | |
| 34 return text | |
| 21 | 35 |
| 22 | 36 |
| 23 def main(args=sys.argv[1:]): | 37 def main(args=sys.argv[1:]): |
| 24 """CLI""" | 38 """CLI""" |
| 25 | 39 |
| 26 # parse command line | 40 # parse command line |
| 27 parser = argparse.ArgumentParser(description=__doc__) | 41 parser = argparse.ArgumentParser(description=__doc__) |
| 28 parser.add_option('urls', metavar='url', nargs='+', | 42 parser.add_argument('urls', metavar='url', nargs='+', |
| 29 help="URLs to convert") | 43 help="URLs to convert") |
| 30 options = parser.parse_args(args) | 44 options = parser.parse_args(args) |
| 31 | 45 |
| 32 # convert urls | 46 # convert urls |
| 33 for url in options.urls: | 47 for url in options.urls: |
| 34 print (url2txt(url)) | 48 print (url2txt(url)) |
