190 lines
5.2 KiB
Python
190 lines
5.2 KiB
Python
|
import yaml
|
||
|
import click
|
||
|
import os
|
||
|
import dotenv
|
||
|
import mistletoe
|
||
|
import re
|
||
|
import requests
|
||
|
import tempfile
|
||
|
import mimetypes
|
||
|
import base64
|
||
|
|
||
|
from xmlrpc.client import Binary
|
||
|
from urllib.parse import urlparse
|
||
|
|
||
|
image_cache = {}
|
||
|
|
||
|
def upload_image(image_path):
|
||
|
|
||
|
global image_cache
|
||
|
|
||
|
if image_path in image_cache:
|
||
|
return image_cache[image_path]
|
||
|
|
||
|
site_url = os.environ['WP_ENDPOINT']
|
||
|
username = os.environ['WP_USER']
|
||
|
password = os.environ['WP_PASSWORD']
|
||
|
|
||
|
# Initialize the WordPress XML-RPC client
|
||
|
wp = Client(site_url, username, password)
|
||
|
|
||
|
mime = mimetypes.guess_type(image_path)[0]
|
||
|
basename = os.path.basename(image_path)
|
||
|
|
||
|
print("url", image_path)
|
||
|
print("basename", basename)
|
||
|
print("mimetype", mimetypes.guess_type(image_path)[0])
|
||
|
|
||
|
with open(image_path,'rb') as f:
|
||
|
thumb = wp.call(media.UploadFile({
|
||
|
"name": basename,
|
||
|
"type": mime,
|
||
|
"bits": Binary(f.read())
|
||
|
}))
|
||
|
|
||
|
image_cache[image_path] = thumb
|
||
|
|
||
|
return image_cache[image_path]
|
||
|
|
||
|
def reupload_image(image_url):
|
||
|
|
||
|
global image_cache
|
||
|
|
||
|
if image_url in image_cache:
|
||
|
return image_cache[image_url]
|
||
|
|
||
|
site_url = os.environ['WP_ENDPOINT']
|
||
|
username = os.environ['WP_USER']
|
||
|
password = os.environ['WP_PASSWORD']
|
||
|
|
||
|
# Initialize the WordPress XML-RPC client
|
||
|
wp = Client(site_url, username, password)
|
||
|
|
||
|
r = requests.get(image_url)
|
||
|
url = urlparse(image_url)
|
||
|
basename = os.path.basename(url.path)
|
||
|
|
||
|
mime = mimetypes.guess_type(image_url)[0]
|
||
|
|
||
|
print("url", url)
|
||
|
print("basename", basename)
|
||
|
print("mimetype", mimetypes.guess_type(image_url)[0])
|
||
|
|
||
|
thumb = wp.call(media.UploadFile({
|
||
|
"name": basename,
|
||
|
"type": mime,
|
||
|
"bits": Binary(r.content)
|
||
|
}))
|
||
|
|
||
|
with open(basename,'wb') as f:
|
||
|
print("write content")
|
||
|
f.write(r.content)
|
||
|
|
||
|
image_cache[image_url] = thumb
|
||
|
|
||
|
return image_cache[image_url]
|
||
|
|
||
|
def replace_figure_tags(markdown_file, html_string):
|
||
|
|
||
|
html_string = html_string.replace("{{<figure", "<figure")
|
||
|
html_string = html_string.replace(">}}",">")
|
||
|
|
||
|
# Define a regular expression pattern to match the original <figure> tag with optional alt and caption
|
||
|
pattern = r'<figure src="([^"]+)"(?: alt="([^"]+)")?(?: caption="([^"]+)")?>'
|
||
|
|
||
|
# Define a function to create the replacement string
|
||
|
def replace_figure(match):
|
||
|
src, alt, caption = match.groups()
|
||
|
replacement = '<figure>\n'
|
||
|
if src:
|
||
|
|
||
|
if src.startswith('http'):
|
||
|
new_url = reupload_image(src)['url']
|
||
|
else:
|
||
|
relpath = os.path.join(os.path.dirname(markdown_file), src)
|
||
|
path = os.path.abspath(relpath)
|
||
|
new_url = upload_image(path)['url']
|
||
|
|
||
|
replacement += f' <img src="{new_url}"'
|
||
|
if alt:
|
||
|
replacement += f' alt="{alt}"'
|
||
|
replacement += '>\n'
|
||
|
if caption:
|
||
|
replacement += f'<figcaption>{caption}</figcaption>'
|
||
|
replacement += '</figure>'
|
||
|
return replacement
|
||
|
|
||
|
# Use re.sub to find and replace all <figure> tags in the input string
|
||
|
new_html_string = re.sub(pattern, replace_figure, html_string)
|
||
|
|
||
|
return new_html_string
|
||
|
|
||
|
|
||
|
|
||
|
from wordpress_xmlrpc import Client, WordPressPost
|
||
|
from wordpress_xmlrpc.methods import posts, media
|
||
|
|
||
|
|
||
|
|
||
|
@click.command()
|
||
|
@click.argument('markdown_files', nargs=-1, type=click.Path(file_okay=True, dir_okay=False))
|
||
|
def upload_hugo_posts_to_wordpress(markdown_files):
|
||
|
|
||
|
dotenv.load_dotenv()
|
||
|
|
||
|
# Configure your WordPress site URL and credentials
|
||
|
site_url = os.environ['WP_ENDPOINT']
|
||
|
username = os.environ['WP_USER']
|
||
|
password = os.environ['WP_PASSWORD']
|
||
|
|
||
|
# Initialize the WordPress XML-RPC client
|
||
|
wp = Client(site_url, username, password)
|
||
|
|
||
|
# Change to the directory where your Hugo Markdown posts are located
|
||
|
#import os
|
||
|
#os.chdir(post_directory)
|
||
|
|
||
|
# Get a list of Markdown files in the directory
|
||
|
|
||
|
# for markdown_file in markdown_files:
|
||
|
# # Read the Hugo Markdown file
|
||
|
for markdown_file in markdown_files:
|
||
|
with open(markdown_file, 'r', encoding='utf-8') as file:
|
||
|
content = file.read()
|
||
|
|
||
|
# separate header from body
|
||
|
_,frontmatter, body = content.split("---\n", maxsplit=2)
|
||
|
|
||
|
fm = yaml.safe_load(frontmatter)
|
||
|
|
||
|
print(fm)
|
||
|
|
||
|
|
||
|
# Parse your Markdown file to extract title and content (you may need to use a library like frontmatter or Hugo-specific logic here)
|
||
|
|
||
|
body = replace_figure_tags(markdown_file, body)
|
||
|
|
||
|
# Create a WordPress post
|
||
|
post = WordPressPost()
|
||
|
post.slug = fm['url']
|
||
|
post.title = fm['title']
|
||
|
post.date = fm['date']
|
||
|
post.content = mistletoe.markdown(body)
|
||
|
|
||
|
|
||
|
if 'thumbnail' in fm:
|
||
|
if fm['thumbnail'].startswith('http'):
|
||
|
post.thumbnail = reupload_image(fm['thumbnail'])['id']
|
||
|
else:
|
||
|
relpath = os.path.join(os.path.dirname(markdown_file), fm['thumbnail'])
|
||
|
path = os.path.abspath(relpath)
|
||
|
post.thumbnail = upload_image(path)['id']
|
||
|
|
||
|
print(post.content)
|
||
|
# Add additional fields like categories, tags, etc. as needed
|
||
|
|
||
|
# Publish the post to WordPress
|
||
|
wp.call(posts.NewPost(post))
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
upload_hugo_posts_to_wordpress()
|