#!/usr/bin/env python import re import os.path import fnmatch cvs_header = r'$Header: /home/pknaggs/cvs/hints/hints/tools/python/twiki_to_tiddlywiki.py,v 1.15 2006/08/20 19:42:53 pknaggs Exp $' # # Change the setting for 'file_list' so that it finds your twiki syntax # files that you want to convert to the tiddlywiki syntax. # Run this script with no parameters, it should create new files # in the same directory (with '.new' as the suffix), containing the # the twiki contents converted over to the tiddlywiki syntax. # # # License: # This script makes use of the example on page 144 of the Python Cookbook, by # Robin Parmar and Alex Martelli, so it is under the same "modified Berkeley # license" (see p. xix of that book). # # # listFiles: # Walk a directory tree, and obtain a list of all the files (and optionally # all the directories) that match a certain pattern. # # The standard directory-tree function os.path.walk is powerful and flexible, # but it can be confusing to beginners. This recipe dresses it up in a listFiles # function that lets you choose the root folder, whether to recurse down through # subfolders, the patterns to match, and whether to include folder names in the # result list. # # The file patterns are case-insensitive but otherwise Linux-style, as supplied # by the standard fnmatch module, which this recipe uses. To specify multiple # patterns, join them with a semicolon. Note that this means that semicolons # themselves can't be part of a pattern. # For example, you can easilu get a list of all Python and HTML files in # directory /tmp or any subdirectory thereof, using: # thefiles = listFiles('/tmp', '*.py;*.htm;*.html') # def listFiles(root, patterns='*', recurse=True, return_folders=False): # Expand patterns from semicolon-separated string to list pattern_list = patterns.split(';') # Collect input and output arguments into one bunch class Bunch: def __init__(self, **kwds): self.__dict__.update(kwds) arg = Bunch(recurse=recurse, pattern_list=pattern_list, return_folders=return_folders, results=[]) def visit(arg, dirname, files): # Append to arg.results all relevant files (and perhaps folders). for name in files: fullname = os.path.normpath(os.path.join(dirname, name)) if arg.return_folders or os.path.isfile(fullname): for pattern in arg.pattern_list: if fnmatch.fnmatch(name, pattern): arg.results.append(fullname) break # Block recursion if recursion was disallowed if not arg.recurse: files[:] = [] os.path.walk(root, visit, arg) return arg.results # Change this setting so that it finds the files (in twiki syntax) that # you want to convert to tiddlywiki syntax: file_list = listFiles('/home/pknaggs/hints/web/penlug/wiki', '*;') # Search for links like [[http://www.url.org/][description]] # and switch them to look like this [[description|http://www.url.org/]] twiki_link_finder = re.compile(r''' \[\[ # search for '[[' (?P[^\]]+?) # non-greedy into group 'the_url' until non-']' \]\[ # search for '][' (?P[^\]]+?) # non-greedy into group 'the_descr' until non ']' \]\] # search for ']]' ''', re.VERBOSE) # the parameter re.VERBOSE enables comments # Search for subheading markers e.g. ---+++ and replace them with !!! sub4 = re.compile(r''' ^---\+\+\+\+ ''', re.VERBOSE) sub3 = re.compile(r''' ^---\+\+\+ ''', re.VERBOSE) sub2 = re.compile(r''' ^---\+\+ ''', re.VERBOSE) sub1 = re.compile(r''' ^---\+ ''', re.VERBOSE) bullet1 = re.compile(r''' (^|[\n\r])[ ]{3}\*[ ] ''', re.VERBOSE) bullet2 = re.compile(r''' (^|[\n\r])[ ]{6}\*[ ] ''', re.VERBOSE) bullet3 = re.compile(r''' (^|[\n\r])[ ]{9}\*[ ] ''', re.VERBOSE) # To avoid words containing both upper and lower case becoming links, # we must de-wikify them using in twiki, but ~ in tiddlywiki. dewikify = re.compile(r''' ''', re.VERBOSE) verbatim_begin = re.compile(r''' (?:[ ]*) # non-group, greedy match of any number of spaces ''', re.VERBOSE) verbatim_end = re.compile(r''' ''', re.VERBOSE) inline_verbatim = re.compile(r''' = # search for '=' (?P[^=]+?) # non-greedy into group 'the_verb' until non-'=' = # search for '=' ''', re.VERBOSE) # Find the words which will be converted to wikiwords by tiddlywiki, and add in # a ~ in front of them so that they remain as normal text. In wikiword_finder, # the addition of "ur" makes the string be interpreted as a raw Unicode string. # This regular expression was copied from config.textPrimitives.wikiLink in the # tiddlywiki source code (wget http://www.tiddlywiki.com/empty.html). wikiword_finder = re.compile(ur''' (? # named group (?: # non-group (?: # non-group [A-Z\u00c0-\u00de]+ # greedy one or more upperLetter [a-z\u00df-\u00ff_0-9\\-]+ # greedy one or more lowerLetter [A-Z\u00c0-\u00de] # one upperLetter [A-Za-z\u00c0-\u00de\u00df-\u00ff_0-9\\-]* # greedy 0 or more anyLetter ) | (?: # non-group [A-Z\u00c0-\u00de]{2,} # defined range {two, or nore} of upperLetter [a-z\u00df-\u00ff_0-9\\-]+ # greedy one or more lowerLetter ) ) ) ''', re.VERBOSE) # the parameter re.VERBOSE enables comments paragraph_begin = re.compile(r'''

# search for '

' ''', re.VERBOSE) paragraph_end = re.compile(r'''

# search for '

' ''', re.VERBOSE) for file in file_list: input = open(file) print 'Processing ' + file output = open(file + '.new', 'w') verbatim_enabled = False for line in input.xreadlines(): if verbatim_begin.search(line): verbatim_enabled = True line = verbatim_begin.sub(r'{{{', line) if verbatim_end.search(line): verbatim_enabled = False line = verbatim_end.sub(r'}}}', line) if verbatim_enabled == False: line = twiki_link_finder.sub(r'[[\g|\g]]', line) line = sub4.sub(r'!!!!', line) line = sub3.sub(r'!!!', line) line = sub2.sub(r'!!', line) line = sub1.sub(r'!', line) line = dewikify.sub(r'~', line) line = wikiword_finder.sub(r'~\g', line) line = inline_verbatim.sub(r'{{{\g}}}', line) line = paragraph_begin.sub(r'', line) line = paragraph_end.sub(r'\n', line) line = bullet1.sub(r'* ', line) line = bullet2.sub(r'** ', line) line = bullet3.sub(r'*** ', line) output.write(line) input.close() output.close()