問題①
時刻がずれる。(9時間進んでしまう)
これは、元のスクリプトがローカル時刻で出力するにも関わらず、UTCであると宣言しているため。日本時間(UTC+9)を考慮するように変更。
問題②
MovableTypeで記事を作成したときのフォーマットによっては、改行が無駄に入ってしまう。
網羅していないフォーマット種別はあるが、分かる範囲で改行追加の有無を判断するようにした。
--- google-blog-converters-r89/src/movabletype2blogger/mt2b.py 2010-05-28 12:21:06.000000000 +0900
+++ google-blog-converters-r89_new/src/movabletype2blogger/mt2b.py 2020-06-20 21:32:30.372455400 +0900
@@ -20,6 +20,7 @@
import re
import sys
import time
+import datetime
from xml.sax.saxutils import escape
import gdata
@@ -27,6 +28,16 @@
__author__ = 'JJ Lueck (jlueck@gmail.com)'
+timezone_gap = 9 # This original script generate timestamp in UTC while it is local time in fact.
+ # and Blogger seems to import entries with considering user's
+ # timezone. Thus, when importing from Japanese timezone (UTC+9), entries
+ # will have timestamp of imported data + 9 hours.
+ # E.g. MovableType timestamp: 05/21/2020 09:33:30 PM
+ # Output of original script: 2020-05-21T21:33:30Z (when ran in Japan)
+ # Timestamp on Blogger: 2020/05/22 06:33 (when imported from Japan)
+ # To avoid this, need adjust timezone.
+ # This value is set to "+9" of "UTC+9"=Japan standard time.
+
########################
# Constants
########################
@@ -114,15 +125,31 @@
last_entry = None # The previous post atom.Entry if exists
tag_name = None # The current name of multi-line values
tag_contents = '' # The contents of multi-line values
+ linenum = 0 # Number of line being processed
+ tag_separator = '' # keep the line with '-' only because it might be
+ # treated as a tag separator ('-' * 8 or '-' * 5) wrongly
+ tag_name_back = tag_name # In case of wrong treatment of tag separator, copy tag_name
+ #_EXTENDED_BODY_SEPARATOR_ = '<br /><a name=\'more\'></a>'
+ # blogger uses "<!--more-->" as the separator and it is conveted to
+ # the above when exported. But it will be imported as
+ # "<a href="https://www.blogger.com/null" name="more"></a>", instead
+ # of original separator - "<!--more-->".
+ _EXTENDED_BODY_SEPARATOR_ = '<!--more-->'
+ extended_body_separator = _EXTENDED_BODY_SEPARATOR_
+ # Also used as a flag indicating separator of extended body from
+ # main body is added into output or not.
+ convert_breaks = '<br/>' # flag to mark if <br/> need be added or not
# Loop through the text lines looking for key/value pairs
for line in infile:
+ linenum+=1
# Remove whitespace
line = line.strip().lstrip(codecs.BOM_UTF8)
# Check for the post ending token
if line == '-' * 8:
+ tag_separator = line
if post_entry:
# If the body tag is still being read, add what has been read.
if tag_name == 'BODY':
@@ -137,11 +164,12 @@
post_entry = None
comment_entry = None
tag_name = None
- tag_contents = ''
+ tag_contents = '\n'
continue
# Check for the tag ending separator
elif line == '-' * 5:
+ tag_separator = line
# Get the contents of the body and set the entry contents
if tag_name == 'BODY':
post_entry.content = atom.Content(
@@ -162,9 +190,10 @@
# entry contents
elif tag_name == 'EXTENDED BODY':
if post_entry:
- post_entry.content.text += '<br/>' + self._TranslateContents(tag_contents)
+ post_entry.content.text += extended_body_separator + self._TranslateContents(tag_contents)
elif last_entry and last_entry.content:
- last_entry.content.text += '<br/>' + self._TranslateContents(tag_contents)
+ last_entry.content.text += extended_body_separator + self._TranslateContents(tag_contents)
+ extended_body_separator = convert_breaks
# Convert any keywords (comma separated values) into Blogger labels
elif tag_name == 'KEYWORDS':
@@ -175,6 +204,7 @@
atom.Category(scheme=CATEGORY_NS, term=keyword))
# Reset the current tag and its contents
+ tag_name_back = tag_name
tag_name = None
tag_contents = ''
continue
@@ -261,20 +291,59 @@
# on following lines
elif key in ('COMMENT', 'BODY', 'EXTENDED BODY', 'EXCERPT', 'KEYWORDS', 'PING'):
tag_name = key
+ tag_separator = ''
+ extended_body_separator = _EXTENDED_BODY_SEPARATOR_
# These lines can be safely ignored
- elif key in ('BASENAME', 'ALLOW COMMENTS', 'CONVERT BREAKS',
+ elif key in ('BASENAME', 'ALLOW COMMENTS', 'CONVERT BREAKS',
'ALLOW PINGS', 'PRIMARY CATEGORY', 'IP', 'URL', 'EMAIL'):
+ tag_separator = ''
+ extended_body_separator = _EXTENDED_BODY_SEPARATOR_
+
+ if key in ('CONVERT BREAKS'):
+ if value in ( '__default__', 'markdown', 'markdown_with_smartypants','textile_2'):
+ convert_breaks = '<br/>'
+ elif ( value == '0'):
+ convert_breaks = ''
+ else:
+ convert_breaks = '<br/>'
+ sys.stderr.write('Warn: ' + str(linenum) + ': cannot be processed. Ignored. "' + line + '"\n')
continue
- # If the line is empty and we're processing the body, add an HTML line
- # break
- elif tag_name == 'BODY' and len(line) == 0:
- tag_contents += '<br/>'
+ # If we're processing the body, extended body, or comment, add the line
+ # because it would be a part of concent.
+ # Note: Excerpt would have the same situation, but blogger does not have excerpt field and this program will ignore it.
+ elif tag_name in ('BODY', 'EXTENDED BODY', 'COMMENT', 'KEYWORDS'):
+ #sys.stderr.write('Normal: ' + str(linenum) + ': assumed the line is part of "' + tag_name+ '" content: "' + line + '"\n')
+ if tag_name in ('BODY', 'EXTENDED BODY', 'COMMENT'):
+ tag_contents += line + convert_breaks
+ else:
+ if tag_contents != "":
+ tag_contents += ","
+ tag_contents += line
+
+ # EXCERPT and PING (=Trackback) are ignored because they are not supported by Blogger.
+ elif tag_name in ('EXCERPT', 'PING') and len(line)!=0:
+ sys.stderr.write('Warn: ' + str(linenum) + ': "' +tag_name+ '" is ignored by blogger: "' + line + '"\n')
# This would be a line of content beyond a key/value pair
- elif len(key) != 0:
- tag_contents += line + '\n'
+ # Add '-----' or '--------' because they located at 1 line above this line would be
+ # treated as a tag seprator and omitted unexpectedly.
+ elif len(key) != 0: #and not ( tag_name in ('EXCERPT', 'PING') ):
+ if (tag_separator != ''):
+ if ( not tag_name ):
+ tag_name = tag_name_back
+ sys.stderr.write('Warn: ' + str(linenum) + ': need recover tag_name to "' + tag_name +'": "' + line + '"\n')
+ tag_contents += tag_separator + convert_breaks
+ sys.stderr.write('Warn: ' + str(linenum) + ': "' + tag_separator + '" was added back to previous line: "' + line + '"\n')
+ tag_separator = ''
+ sys.stderr.write('Warn: ' + str(linenum) + ': assumed the line is part of "' + tag_name+ '" content: "' + line + '"\n')
+ tag_contents += line + convert_breaks
+
+ # Lines unable to process
+ elif line != '':
+ #else:
+ sys.stderr.write('Error: ' + str(linenum) + ': cannot process line: "' + line + '"\n')
# Update the feed with the last updated time
@@ -307,7 +376,8 @@
def _FromMtTime(self, mt_time):
try:
- return time.strptime(mt_time, "%m/%d/%Y %I:%M:%S %p")
+ dt3 = datetime.datetime.strptime(mt_time, "%m/%d/%Y %I:%M:%S %p") + datetime.timedelta(hours = -1 * timezone_gap)
+ return dt3.timetuple()
except ValueError:
return time.gmtime()
0 件のコメント:
新しいコメントは書き込めません。