From e47b2f10d5e13a6ab86f45ee4d3ee8187ec00b42 Mon Sep 17 00:00:00 2001 From: James Ravenscroft Date: Tue, 31 Oct 2023 07:15:13 +0000 Subject: [PATCH] initial commit --- convert.py | 190 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 44 +++++++++++ 2 files changed, 234 insertions(+) create mode 100644 convert.py create mode 100644 requirements.txt diff --git a/convert.py b/convert.py new file mode 100644 index 0000000..972ef34 --- /dev/null +++ b/convert.py @@ -0,0 +1,190 @@ +import yaml +import click +import os +import dotenv +import mistletoe +import re +import requests +import tempfile +import mimetypes +import base64 + +from xmlrpc.client import Binary +from urllib.parse import urlparse + +image_cache = {} + +def upload_image(image_path): + + global image_cache + + if image_path in image_cache: + return image_cache[image_path] + + site_url = os.environ['WP_ENDPOINT'] + username = os.environ['WP_USER'] + password = os.environ['WP_PASSWORD'] + + # Initialize the WordPress XML-RPC client + wp = Client(site_url, username, password) + + mime = mimetypes.guess_type(image_path)[0] + basename = os.path.basename(image_path) + + print("url", image_path) + print("basename", basename) + print("mimetype", mimetypes.guess_type(image_path)[0]) + + with open(image_path,'rb') as f: + thumb = wp.call(media.UploadFile({ + "name": basename, + "type": mime, + "bits": Binary(f.read()) + })) + + image_cache[image_path] = thumb + + return image_cache[image_path] + +def reupload_image(image_url): + + global image_cache + + if image_url in image_cache: + return image_cache[image_url] + + site_url = os.environ['WP_ENDPOINT'] + username = os.environ['WP_USER'] + password = os.environ['WP_PASSWORD'] + + # Initialize the WordPress XML-RPC client + wp = Client(site_url, username, password) + + r = requests.get(image_url) + url = urlparse(image_url) + basename = os.path.basename(url.path) + + mime = mimetypes.guess_type(image_url)[0] + + print("url", url) + print("basename", basename) + print("mimetype", mimetypes.guess_type(image_url)[0]) + + thumb = wp.call(media.UploadFile({ + "name": basename, + "type": mime, + "bits": Binary(r.content) + })) + + with open(basename,'wb') as f: + print("write content") + f.write(r.content) + + image_cache[image_url] = thumb + + return image_cache[image_url] + +def replace_figure_tags(markdown_file, html_string): + + html_string = html_string.replace("{{}}",">") + + # Define a regular expression pattern to match the original
tag with optional alt and caption + pattern = r'
' + + # Define a function to create the replacement string + def replace_figure(match): + src, alt, caption = match.groups() + replacement = '
\n' + if src: + + if src.startswith('http'): + new_url = reupload_image(src)['url'] + else: + relpath = os.path.join(os.path.dirname(markdown_file), src) + path = os.path.abspath(relpath) + new_url = upload_image(path)['url'] + + replacement += f' {alt}{caption}' + replacement += '
' + return replacement + + # Use re.sub to find and replace all
tags in the input string + new_html_string = re.sub(pattern, replace_figure, html_string) + + return new_html_string + + + +from wordpress_xmlrpc import Client, WordPressPost +from wordpress_xmlrpc.methods import posts, media + + + +@click.command() +@click.argument('markdown_files', nargs=-1, type=click.Path(file_okay=True, dir_okay=False)) +def upload_hugo_posts_to_wordpress(markdown_files): + + dotenv.load_dotenv() + + # Configure your WordPress site URL and credentials + site_url = os.environ['WP_ENDPOINT'] + username = os.environ['WP_USER'] + password = os.environ['WP_PASSWORD'] + + # Initialize the WordPress XML-RPC client + wp = Client(site_url, username, password) + + # Change to the directory where your Hugo Markdown posts are located + #import os + #os.chdir(post_directory) + + # Get a list of Markdown files in the directory + + # for markdown_file in markdown_files: + # # Read the Hugo Markdown file + for markdown_file in markdown_files: + with open(markdown_file, 'r', encoding='utf-8') as file: + content = file.read() + + # separate header from body + _,frontmatter, body = content.split("---\n", maxsplit=2) + + fm = yaml.safe_load(frontmatter) + + print(fm) + + + # Parse your Markdown file to extract title and content (you may need to use a library like frontmatter or Hugo-specific logic here) + + body = replace_figure_tags(markdown_file, body) + + # Create a WordPress post + post = WordPressPost() + post.slug = fm['url'] + post.title = fm['title'] + post.date = fm['date'] + post.content = mistletoe.markdown(body) + + + if 'thumbnail' in fm: + if fm['thumbnail'].startswith('http'): + post.thumbnail = reupload_image(fm['thumbnail'])['id'] + else: + relpath = os.path.join(os.path.dirname(markdown_file), fm['thumbnail']) + path = os.path.abspath(relpath) + post.thumbnail = upload_image(path)['id'] + + print(post.content) + # Add additional fields like categories, tags, etc. as needed + + # Publish the post to WordPress + wp.call(posts.NewPost(post)) + +if __name__ == '__main__': + upload_hugo_posts_to_wordpress() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..612dc0b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,44 @@ +asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work +backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work +backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1687772187254/work +certifi==2023.7.22 +charset-normalizer==3.3.1 +click==8.1.7 +comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1691044910542/work +debugpy @ file:///croot/debugpy_1690905042057/work +decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work +entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work +exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1692026125334/work +executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1698579936712/work +idna==3.4 +ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1698244021190/work +ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1696264049390/work +jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1696326070614/work +jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1654730843242/work +jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1698673647019/work +matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work +mistletoe==1.2.1 +nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1697083700168/work +packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1696202382185/work +parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work +pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1667297516076/work +pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work +platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1696272223550/work +prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1688565951714/work +psutil @ file:///opt/conda/conda-bld/psutil_1656431268089/work +ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl +pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work +Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1691408637400/work +python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work +python-dotenv==1.0.0 +python-wordpress-xmlrpc==2.3 +PyYAML==6.0.1 +pyzmq @ file:///croot/pyzmq_1686601365461/work +requests==2.31.0 +six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work +stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work +tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1648827254365/work +traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1698671135544/work +typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1695040754690/work +urllib3==2.0.7 +wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1696255154857/work