initial commit

This commit is contained in:
James Ravenscroft 2023-10-31 07:15:13 +00:00
commit e47b2f10d5
2 changed files with 234 additions and 0 deletions

190
convert.py Normal file
View File

@ -0,0 +1,190 @@
import yaml
import click
import os
import dotenv
import mistletoe
import re
import requests
import tempfile
import mimetypes
import base64
from xmlrpc.client import Binary
from urllib.parse import urlparse
image_cache = {}
def upload_image(image_path):
global image_cache
if image_path in image_cache:
return image_cache[image_path]
site_url = os.environ['WP_ENDPOINT']
username = os.environ['WP_USER']
password = os.environ['WP_PASSWORD']
# Initialize the WordPress XML-RPC client
wp = Client(site_url, username, password)
mime = mimetypes.guess_type(image_path)[0]
basename = os.path.basename(image_path)
print("url", image_path)
print("basename", basename)
print("mimetype", mimetypes.guess_type(image_path)[0])
with open(image_path,'rb') as f:
thumb = wp.call(media.UploadFile({
"name": basename,
"type": mime,
"bits": Binary(f.read())
}))
image_cache[image_path] = thumb
return image_cache[image_path]
def reupload_image(image_url):
global image_cache
if image_url in image_cache:
return image_cache[image_url]
site_url = os.environ['WP_ENDPOINT']
username = os.environ['WP_USER']
password = os.environ['WP_PASSWORD']
# Initialize the WordPress XML-RPC client
wp = Client(site_url, username, password)
r = requests.get(image_url)
url = urlparse(image_url)
basename = os.path.basename(url.path)
mime = mimetypes.guess_type(image_url)[0]
print("url", url)
print("basename", basename)
print("mimetype", mimetypes.guess_type(image_url)[0])
thumb = wp.call(media.UploadFile({
"name": basename,
"type": mime,
"bits": Binary(r.content)
}))
with open(basename,'wb') as f:
print("write content")
f.write(r.content)
image_cache[image_url] = thumb
return image_cache[image_url]
def replace_figure_tags(markdown_file, html_string):
html_string = html_string.replace("{{<figure", "<figure")
html_string = html_string.replace(">}}",">")
# Define a regular expression pattern to match the original <figure> tag with optional alt and caption
pattern = r'<figure src="([^"]+)"(?: alt="([^"]+)")?(?: caption="([^"]+)")?>'
# Define a function to create the replacement string
def replace_figure(match):
src, alt, caption = match.groups()
replacement = '<figure>\n'
if src:
if src.startswith('http'):
new_url = reupload_image(src)['url']
else:
relpath = os.path.join(os.path.dirname(markdown_file), src)
path = os.path.abspath(relpath)
new_url = upload_image(path)['url']
replacement += f' <img src="{new_url}"'
if alt:
replacement += f' alt="{alt}"'
replacement += '>\n'
if caption:
replacement += f'<figcaption>{caption}</figcaption>'
replacement += '</figure>'
return replacement
# Use re.sub to find and replace all <figure> tags in the input string
new_html_string = re.sub(pattern, replace_figure, html_string)
return new_html_string
from wordpress_xmlrpc import Client, WordPressPost
from wordpress_xmlrpc.methods import posts, media
@click.command()
@click.argument('markdown_files', nargs=-1, type=click.Path(file_okay=True, dir_okay=False))
def upload_hugo_posts_to_wordpress(markdown_files):
dotenv.load_dotenv()
# Configure your WordPress site URL and credentials
site_url = os.environ['WP_ENDPOINT']
username = os.environ['WP_USER']
password = os.environ['WP_PASSWORD']
# Initialize the WordPress XML-RPC client
wp = Client(site_url, username, password)
# Change to the directory where your Hugo Markdown posts are located
#import os
#os.chdir(post_directory)
# Get a list of Markdown files in the directory
# for markdown_file in markdown_files:
# # Read the Hugo Markdown file
for markdown_file in markdown_files:
with open(markdown_file, 'r', encoding='utf-8') as file:
content = file.read()
# separate header from body
_,frontmatter, body = content.split("---\n", maxsplit=2)
fm = yaml.safe_load(frontmatter)
print(fm)
# Parse your Markdown file to extract title and content (you may need to use a library like frontmatter or Hugo-specific logic here)
body = replace_figure_tags(markdown_file, body)
# Create a WordPress post
post = WordPressPost()
post.slug = fm['url']
post.title = fm['title']
post.date = fm['date']
post.content = mistletoe.markdown(body)
if 'thumbnail' in fm:
if fm['thumbnail'].startswith('http'):
post.thumbnail = reupload_image(fm['thumbnail'])['id']
else:
relpath = os.path.join(os.path.dirname(markdown_file), fm['thumbnail'])
path = os.path.abspath(relpath)
post.thumbnail = upload_image(path)['id']
print(post.content)
# Add additional fields like categories, tags, etc. as needed
# Publish the post to WordPress
wp.call(posts.NewPost(post))
if __name__ == '__main__':
upload_hugo_posts_to_wordpress()

44
requirements.txt Normal file
View File

@ -0,0 +1,44 @@
asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work
backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1687772187254/work
certifi==2023.7.22
charset-normalizer==3.3.1
click==8.1.7
comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1691044910542/work
debugpy @ file:///croot/debugpy_1690905042057/work
decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work
entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work
exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1692026125334/work
executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1698579936712/work
idna==3.4
ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1698244021190/work
ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1696264049390/work
jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1696326070614/work
jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1654730843242/work
jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1698673647019/work
matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work
mistletoe==1.2.1
nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1697083700168/work
packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1696202382185/work
parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work
pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1667297516076/work
pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1696272223550/work
prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1688565951714/work
psutil @ file:///opt/conda/conda-bld/psutil_1656431268089/work
ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work
Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1691408637400/work
python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
python-dotenv==1.0.0
python-wordpress-xmlrpc==2.3
PyYAML==6.0.1
pyzmq @ file:///croot/pyzmq_1686601365461/work
requests==2.31.0
six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work
tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1648827254365/work
traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1698671135544/work
typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1695040754690/work
urllib3==2.0.7
wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1696255154857/work