wpconvert/convert.py

190 lines
5.2 KiB
Python
Raw Normal View History

2023-10-31 07:15:13 +00:00
import yaml
import click
import os
import dotenv
import mistletoe
import re
import requests
import tempfile
import mimetypes
import base64
from xmlrpc.client import Binary
from urllib.parse import urlparse
image_cache = {}
def upload_image(image_path):
global image_cache
if image_path in image_cache:
return image_cache[image_path]
site_url = os.environ['WP_ENDPOINT']
username = os.environ['WP_USER']
password = os.environ['WP_PASSWORD']
# Initialize the WordPress XML-RPC client
wp = Client(site_url, username, password)
mime = mimetypes.guess_type(image_path)[0]
basename = os.path.basename(image_path)
print("url", image_path)
print("basename", basename)
print("mimetype", mimetypes.guess_type(image_path)[0])
with open(image_path,'rb') as f:
thumb = wp.call(media.UploadFile({
"name": basename,
"type": mime,
"bits": Binary(f.read())
}))
image_cache[image_path] = thumb
return image_cache[image_path]
def reupload_image(image_url):
global image_cache
if image_url in image_cache:
return image_cache[image_url]
site_url = os.environ['WP_ENDPOINT']
username = os.environ['WP_USER']
password = os.environ['WP_PASSWORD']
# Initialize the WordPress XML-RPC client
wp = Client(site_url, username, password)
r = requests.get(image_url)
url = urlparse(image_url)
basename = os.path.basename(url.path)
mime = mimetypes.guess_type(image_url)[0]
print("url", url)
print("basename", basename)
print("mimetype", mimetypes.guess_type(image_url)[0])
thumb = wp.call(media.UploadFile({
"name": basename,
"type": mime,
"bits": Binary(r.content)
}))
with open(basename,'wb') as f:
print("write content")
f.write(r.content)
image_cache[image_url] = thumb
return image_cache[image_url]
def replace_figure_tags(markdown_file, html_string):
html_string = html_string.replace("{{<figure", "<figure")
html_string = html_string.replace(">}}",">")
# Define a regular expression pattern to match the original <figure> tag with optional alt and caption
pattern = r'<figure src="([^"]+)"(?: alt="([^"]+)")?(?: caption="([^"]+)")?>'
# Define a function to create the replacement string
def replace_figure(match):
src, alt, caption = match.groups()
replacement = '<figure>\n'
if src:
if src.startswith('http'):
new_url = reupload_image(src)['url']
else:
relpath = os.path.join(os.path.dirname(markdown_file), src)
path = os.path.abspath(relpath)
new_url = upload_image(path)['url']
replacement += f' <img src="{new_url}"'
if alt:
replacement += f' alt="{alt}"'
replacement += '>\n'
if caption:
replacement += f'<figcaption>{caption}</figcaption>'
replacement += '</figure>'
return replacement
# Use re.sub to find and replace all <figure> tags in the input string
new_html_string = re.sub(pattern, replace_figure, html_string)
return new_html_string
from wordpress_xmlrpc import Client, WordPressPost
from wordpress_xmlrpc.methods import posts, media
@click.command()
@click.argument('markdown_files', nargs=-1, type=click.Path(file_okay=True, dir_okay=False))
def upload_hugo_posts_to_wordpress(markdown_files):
dotenv.load_dotenv()
# Configure your WordPress site URL and credentials
site_url = os.environ['WP_ENDPOINT']
username = os.environ['WP_USER']
password = os.environ['WP_PASSWORD']
# Initialize the WordPress XML-RPC client
wp = Client(site_url, username, password)
# Change to the directory where your Hugo Markdown posts are located
#import os
#os.chdir(post_directory)
# Get a list of Markdown files in the directory
# for markdown_file in markdown_files:
# # Read the Hugo Markdown file
for markdown_file in markdown_files:
with open(markdown_file, 'r', encoding='utf-8') as file:
content = file.read()
# separate header from body
_,frontmatter, body = content.split("---\n", maxsplit=2)
fm = yaml.safe_load(frontmatter)
print(fm)
# Parse your Markdown file to extract title and content (you may need to use a library like frontmatter or Hugo-specific logic here)
body = replace_figure_tags(markdown_file, body)
# Create a WordPress post
post = WordPressPost()
post.slug = fm['url']
post.title = fm['title']
post.date = fm['date']
post.content = mistletoe.markdown(body)
if 'thumbnail' in fm:
if fm['thumbnail'].startswith('http'):
post.thumbnail = reupload_image(fm['thumbnail'])['id']
else:
relpath = os.path.join(os.path.dirname(markdown_file), fm['thumbnail'])
path = os.path.abspath(relpath)
post.thumbnail = upload_image(path)['id']
print(post.content)
# Add additional fields like categories, tags, etc. as needed
# Publish the post to WordPress
wp.call(posts.NewPost(post))
if __name__ == '__main__':
upload_hugo_posts_to_wordpress()