在控制url参数时总想找个轻松省事的方式来按某种规则解析字符串,偏偏正则又是个坑爹东西,又不想每次去查手册,总想一劳永逸的解决问题,没办法只能吃一次苦,记录下来便于以后使用吧
def extract_values(
string, pattern, lowercase=False, whitespace=None,
strip_values=False,delimiters=['{', '}']
):
# Checking for lowercase convertion
if lowercase:
string = string.lower()
# Checking for a sequence in delimiters
try:
delimiters = tuple(delimiters)
except TypeError:
return {}
# Checking for just two delimiters
if len(delimiters) != 2:
return {}
# Check if whitespace should be removed
if whitespace is not None:
# Make sure it's an integer
try:
whitespace = int(whitespace)
except TypeError:
return {}
# Make sure it's a non-negative integer
if whitespace < 0:
return {}
# Now remove whitespace
if whitespace == 0:
# Removing all whitespace from each value
string = re.sub('\s', '', string)
else:
# Removing only extra whitespace from each value
string = re.sub(
'(\s)\s{%d,}' % (whitespace - 1), '\\1', string
)
# Helper regular expressions
splitter = re.compile('(%s\w+%s)' % (delimiters[0], delimiters[1]))
extracter = re.compile('%s(\w+)%s' % (delimiters[0], delimiters[1]))
# Split pattern into parts including named groups
parts = splitter.split(pattern)
# Expand group or escape non-group
for idx, p in enumerate(parts):
# Part is a named group
if splitter.match(p):
name = extracter.search(p).groups()[0]
parts[idx] = '(?P<%s>.+)' % name
# Part is something else
else:
# Escape values with special semantics in regular expressions
parts[idx] = re.escape(p)
# Build expanded pattern
expanded_pattern = '^%s$' % ''.join(parts)
try:
# Attempt to extract values
value_dict = re.match(expanded_pattern, string).groupdict()
# Check if values must be stripped
if strip_values:
for name in value_dict:
value_dict[name] = value_dict[name].strip()
# Finally, return values
return value_dict
except re.error:
return {}
except AttributeError:
# No regexp match, just return an empty dictionary
return {}
print(extract_values('380-250-80-j', '{width}-{height}-{quality}-{format}'))
print(extract_values('/2012/08/12/test.html', '/{year}/{month}/{day}/{title}.html'))
print(extract_values('John Doe <john@example.com> (http://example.com)', '{name} <{email}> ({url})'))
print(extract_values('from 4th October to 10th October', 'from `from` to `to`', strip_values=True, whitespace=1, delimiters=['`', '`']))
print(extract_values('Convert 1500 Grams to Kilograms', 'convert {quantity} {from_unit} to {to_unit}', lowercase=True))
print(extract_values('The time is 4:35pm here at Lima, Peru', 'The time is :time here at :city', delimiters=[':', '']))
结果还是很香的啊
{'width': '380', 'height': '250', 'quality': '80', 'format': 'j'}
{'year': '2012', 'month': '08', 'day': '12', 'title': 'test'}
{'name': 'John Doe', 'email': 'john@example.com', 'url': 'http://example.com'}
{'from': '4th October', 'to': '10th October'}
{'quantity': '1500', 'from_unit': 'grams', 'to_unit': 'kilograms'}
{'time': '4:35pm', 'city': 'Lima, Peru'}
又重温了 python的语法, 凑合着用吧
# coding=utf-8
# author=toohamster
import os
from PIL import Image
from resizeimage import resizeimage
import re
import mimetypes
# https://stackoverflow.com/questions/10607468/how-to-reduce-the-image-file-size-using-pil
def extract_values(string, pattern, lowercase=False, whitespace=None,strip_values=False,delimiters=['{', '}']):
# Checking for lowercase convertion
if lowercase:
string = string.lower()
# Checking for a sequence in delimiters
try:
delimiters = tuple(delimiters)
except TypeError:
return {}
# Checking for just two delimiters
if len(delimiters) != 2:
return {}
# Check if whitespace should be removed
if whitespace is not None:
# Make sure it's an integer
try:
whitespace = int(whitespace)
except TypeError:
return {}
# Make sure it's a non-negative integer
if whitespace < 0:
return {}
# Now remove whitespace
if whitespace == 0:
# Removing all whitespace from each value
string = re.sub('\s', '', string)
else:
# Removing only extra whitespace from each value
string = re.sub(
'(\s)\s{%d,}' % (whitespace - 1), '\\1', string
)
# Helper regular expressions
splitter = re.compile('(%s\w+%s)' % (delimiters[0], delimiters[1]))
extracter = re.compile('%s(\w+)%s' % (delimiters[0], delimiters[1]))
# Split pattern into parts including named groups
parts = splitter.split(pattern)
# Expand group or escape non-group
for idx, p in enumerate(parts):
# Part is a named group
if splitter.match(p):
name = extracter.search(p).groups()[0]
parts[idx] = '(?P<%s>.+)' % name
# Part is something else
else:
# Escape values with special semantics in regular expressions
parts[idx] = re.escape(p)
# Build expanded pattern
expanded_pattern = '^%s$' % ''.join(parts)
try:
# Attempt to extract values
value_dict = re.match(expanded_pattern, string).groupdict()
# Check if values must be stripped
if strip_values:
for name in value_dict:
value_dict[name] = value_dict[name].strip()
# Finally, return values
return value_dict
except re.error:
return {}
except AttributeError:
# No regexp match, just return an empty dictionary
return {}
def parseImageMetas(meta_str, extension):
o = extract_values(meta_str, 's-{width}x{height}x{quality}x{format}')
if 'width' not in o or o['width'].isdigit() != True or int(o['width']) < 1:
return None
o['width'] = int(o['width'])
if 'height' not in o or o['height'].isdigit() != True or int(o['height']) < 1:
return None
o['height'] = int(o['height'])
if 'quality' not in o or o['quality'].isdigit() != True or int(o['quality']) < 1:
return None
o['quality'] = int(o['quality'])
if (o['quality'] > 90):
o['quality'] = 90
formats = {
'j': "JPEG",
'p': "PNG",
'w': "WEBP",
'o': extension
}
o['format'] = formats.get(o['format'], extension)
o['mime'] = mimetypes.guess_type("example." + o['format'])[0]
return o
def parseImageUrl(url, real_img_format=False):
new_file = url[0:].split('?')[0]
spath = new_file.split("/")
new_filename = spath[-1]
tmp = new_filename.split('_', 1)
if len(tmp) != 2:
return None
meta_str = tmp[0]
if real_img_format == False:
extension = os.path.splitext(new_file)[1][1:].strip().upper()
extension = 'JPEG' if extension == 'JPG' else extension
else:
extension = real_img_format
return parseImageMetas(meta_str, extension)
def resizeImage(new_file):
img_path = new_file
new_file = new_file.split('?')[0]
spath = new_file.split("/")
new_filename = spath[-1]
tmp = new_filename.split('_', 1)
if len(tmp) != 2:
print("path error")
return None
origin_file = '/'.join(spath[:-1]) + '/' + tmp[1]
resize_type = 'others'
if tmp[0].startswith('s-'):
resize_type = 's'
size_tmp = tmp[0].split('-', 1)
size = size_tmp[1]
else:
size = tmp[0]
if resize_type != 's':
print("不支持此指令!")
return None
im = Image.open(origin_file)
print("原图: " + origin_file + " sizeof " + str(os.path.getsize(origin_file)))
print(im.format, im.mode, im.size)
img_metas = parseImageUrl(img_path, im.format)
if img_metas == None:
print("param format error!")
return None
img = resizeimage.resize_cover(im, [img_metas['width'], img_metas['height']], False)
if img_metas['format'] == 'JPEG':
img.convert("RGB")
# JPEG 时 quality 能控制图片生成的大小, optimize 不起作用; PNG 的时候 optimize 能控制图片大小, quality 不起作用
# subsampling=0, quality=95,optimize=True
img.save(img_path, img_metas['format'], subsampling=0, quality=img_metas['quality'], optimize=True)
print(img_path + " sizeof " + str(os.path.getsize(img_path)))
# // image/png | image/jpeg | image/gif | image/webp
resizeImage("/Users/toohamster/test/s-725x360x80xj_dcdf_55537.jpg")
resizeImage("/Users/toohamster/test/s-725x360x75xj_dcdf_55537.jpg")
resizeImage("/Users/toohamster/test/s-725x360x70xj_dcdf_55537.jpg")
安装依赖库:
pip3.7 install Pillow==8.0.1
pip3.7 install python-resize-image==1.1.19
pip3.7 install piexif==1.1.3
打印结果:
/usr/local/bin/python3 /Users/toohamster/workspace/test/demo.py
原图: /Users/toohamster/test/dcdf_55537.jpg sizeof 133921
JPEG RGB (700, 350)
/Users/toohamster/test/s-725x360x80xj_dcdf_55537.jpg sizeof 75246
原图: /Users/toohamster/test/dcdf_55537.jpg sizeof 133921
JPEG RGB (700, 350)
/Users/toohamster/test/s-725x360x75xj_dcdf_55537.jpg sizeof 65268
原图: /Users/toohamster/test/dcdf_55537.jpg sizeof 133921
JPEG RGB (700, 350)
/Users/toohamster/test/s-725x360x70xj_dcdf_55537.jpg sizeof 59009
Process finished with exit code 0
网友评论