wpconvert/convert.py

190 lines
5.2 KiB
Python

import yaml
import click
import os
import dotenv
import mistletoe
import re
import requests
import tempfile
import mimetypes
import base64
from xmlrpc.client import Binary
from urllib.parse import urlparse
image_cache = {}
def upload_image(image_path):
global image_cache
if image_path in image_cache:
return image_cache[image_path]
site_url = os.environ['WP_ENDPOINT']
username = os.environ['WP_USER']
password = os.environ['WP_PASSWORD']
# Initialize the WordPress XML-RPC client
wp = Client(site_url, username, password)
mime = mimetypes.guess_type(image_path)[0]
basename = os.path.basename(image_path)
print("url", image_path)
print("basename", basename)
print("mimetype", mimetypes.guess_type(image_path)[0])
with open(image_path,'rb') as f:
thumb = wp.call(media.UploadFile({
"name": basename,
"type": mime,
"bits": Binary(f.read())
}))
image_cache[image_path] = thumb
return image_cache[image_path]
def reupload_image(image_url):
global image_cache
if image_url in image_cache:
return image_cache[image_url]
site_url = os.environ['WP_ENDPOINT']
username = os.environ['WP_USER']
password = os.environ['WP_PASSWORD']
# Initialize the WordPress XML-RPC client
wp = Client(site_url, username, password)
r = requests.get(image_url)
url = urlparse(image_url)
basename = os.path.basename(url.path)
mime = mimetypes.guess_type(image_url)[0]
print("url", url)
print("basename", basename)
print("mimetype", mimetypes.guess_type(image_url)[0])
thumb = wp.call(media.UploadFile({
"name": basename,
"type": mime,
"bits": Binary(r.content)
}))
with open(basename,'wb') as f:
print("write content")
f.write(r.content)
image_cache[image_url] = thumb
return image_cache[image_url]
def replace_figure_tags(markdown_file, html_string):
html_string = html_string.replace("{{<figure", "<figure")
html_string = html_string.replace(">}}",">")
# Define a regular expression pattern to match the original <figure> tag with optional alt and caption
pattern = r'<figure src="([^"]+)"(?: alt="([^"]+)")?(?: caption="([^"]+)")?>'
# Define a function to create the replacement string
def replace_figure(match):
src, alt, caption = match.groups()
replacement = '<figure>\n'
if src:
if src.startswith('http'):
new_url = reupload_image(src)['url']
else:
relpath = os.path.join(os.path.dirname(markdown_file), src)
path = os.path.abspath(relpath)
new_url = upload_image(path)['url']
replacement += f' <img src="{new_url}"'
if alt:
replacement += f' alt="{alt}"'
replacement += '>\n'
if caption:
replacement += f'<figcaption>{caption}</figcaption>'
replacement += '</figure>'
return replacement
# Use re.sub to find and replace all <figure> tags in the input string
new_html_string = re.sub(pattern, replace_figure, html_string)
return new_html_string
from wordpress_xmlrpc import Client, WordPressPost
from wordpress_xmlrpc.methods import posts, media
@click.command()
@click.argument('markdown_files', nargs=-1, type=click.Path(file_okay=True, dir_okay=False))
def upload_hugo_posts_to_wordpress(markdown_files):
dotenv.load_dotenv()
# Configure your WordPress site URL and credentials
site_url = os.environ['WP_ENDPOINT']
username = os.environ['WP_USER']
password = os.environ['WP_PASSWORD']
# Initialize the WordPress XML-RPC client
wp = Client(site_url, username, password)
# Change to the directory where your Hugo Markdown posts are located
#import os
#os.chdir(post_directory)
# Get a list of Markdown files in the directory
# for markdown_file in markdown_files:
# # Read the Hugo Markdown file
for markdown_file in markdown_files:
with open(markdown_file, 'r', encoding='utf-8') as file:
content = file.read()
# separate header from body
_,frontmatter, body = content.split("---\n", maxsplit=2)
fm = yaml.safe_load(frontmatter)
print(fm)
# Parse your Markdown file to extract title and content (you may need to use a library like frontmatter or Hugo-specific logic here)
body = replace_figure_tags(markdown_file, body)
# Create a WordPress post
post = WordPressPost()
post.slug = fm['url']
post.title = fm['title']
post.date = fm['date']
post.content = mistletoe.markdown(body)
if 'thumbnail' in fm:
if fm['thumbnail'].startswith('http'):
post.thumbnail = reupload_image(fm['thumbnail'])['id']
else:
relpath = os.path.join(os.path.dirname(markdown_file), fm['thumbnail'])
path = os.path.abspath(relpath)
post.thumbnail = upload_image(path)['id']
print(post.content)
# Add additional fields like categories, tags, etc. as needed
# Publish the post to WordPress
wp.call(posts.NewPost(post))
if __name__ == '__main__':
upload_hugo_posts_to_wordpress()