summaryrefslogtreecommitdiff
path: root/libre/iceweasel/process-json-files.py
diff options
context:
space:
mode:
authorgrizzlyuser <grizzlyuser@protonmail.com>2021-04-04 23:16:26 +0300
committerbill-auger <mr.j.spam.me@gmail.com>2021-04-08 11:12:58 -0400
commit9f45f935ac5c57381ee9580d84f40189c2ba8994 (patch)
tree829fff98be1ec701727568b3778ba18787864d24 /libre/iceweasel/process-json-files.py
parentc52b1e0d7644aff6e00dec8b4433d5aad9e22dd6 (diff)
libre/iceweasel: JSON processing script updates
Refactor and add processing of top-sites.json Remote Settings dump, which is needed to override default sites with nonfree content which are displayed on new tab and in the dropdown of address bar.
Diffstat (limited to 'libre/iceweasel/process-json-files.py')
-rw-r--r--libre/iceweasel/process-json-files.py207
1 files changed, 132 insertions, 75 deletions
diff --git a/libre/iceweasel/process-json-files.py b/libre/iceweasel/process-json-files.py
index a972e90e2..69264dc94 100644
--- a/libre/iceweasel/process-json-files.py
+++ b/libre/iceweasel/process-json-files.py
@@ -1,6 +1,6 @@
#! /usr/bin/python3
-# Copyright (C) 2020 grizzlyuser <grizzlyuser@protonmail.com>
+# Copyright (C) 2020, 2021 grizzlyuser <grizzlyuser@protonmail.com>
# Based on: https://gitlab.trisquel.org/trisquel/wrapage-helpers/-/blob/81881d89b2bf7d502dd14fcccdb471fec6f6b206/helpers/DATA/firefox/reprocess-search-config.py
# Below is the notice from the original author:
#
@@ -42,6 +42,7 @@ parser.add_argument(
'-i',
'--indent',
type=int,
+ default=2,
help='indent for pretty printing of output files')
arguments = parser.parse_args()
@@ -49,103 +50,127 @@ File = namedtuple('File', ['path', 'content'])
class RemoteSettings:
- DUMPS_PATH = arguments.MAIN_PATH / 'services/settings/dumps'
- JSON_PATHS = tuple(DUMPS_PATH.glob('*/*.json'))
- WRAPPER_NAME = 'data'
+ DUMPS_PATH_RELATIVE = 'services/settings/dumps'
+ DUMPS_PATH_ABSOLUTE = arguments.MAIN_PATH / DUMPS_PATH_RELATIVE
+
+ _WRAPPER_NAME = 'data'
@classmethod
def wrap(cls, processed):
- return File(processed.path, {cls.WRAPPER_NAME: processed.content})
+ return File(processed.path, {cls._WRAPPER_NAME: processed.content})
@classmethod
def unwrap(cls, parsed_jsons):
- return [File(json.path, json.content[cls.WRAPPER_NAME])
+ return [File(json.path, json.content[cls._WRAPPER_NAME])
for json in parsed_jsons]
@classmethod
- def process_raw(cls, unwrapped_jsons):
- changes = []
- output_path = cls.DUMPS_PATH / 'monitor/changes.json'
+ def should_modify_collection(cls, collection):
+ return True
+ @classmethod
+ def process_raw(cls, unwrapped_jsons, parsed_schema):
+ timestamps, result = [], []
for collection in unwrapped_jsons:
- if collection.path == cls.DUMPS_PATH / 'main/example.json':
- continue
- latest_change = {}
- latest_change['last_modified'] = max(
- (record['last_modified'] for record in collection.content), default=0)
- latest_change['bucket'] = collection.path.parent.name
- latest_change['collection'] = collection.path.stem
- changes.append(latest_change)
+ should_modify_collection = cls.should_modify_collection(collection)
+ for record in collection.content:
+ if should_modify_collection:
+ if cls.should_drop_record(record):
+ continue
- output_path.parent.mkdir(exist_ok=True)
+ clone = copy.deepcopy(record)
- return File(output_path, changes)
+ record = cls.process_record(record)
- @classmethod
- def process(cls, parsed_jsons):
- return cls.wrap(cls.process_raw(cls.unwrap(parsed_jsons)))
+ if clone != record:
+ timestamp = int(round(time.time_ns() / 10 ** 6))
+ while timestamp in timestamps:
+ timestamp += 1
+ timestamps.append(timestamp)
+ record['last_modified'] = timestamp
+ if parsed_schema is not None:
+ validate(record, schema=parsed_schema)
-class SearchConfig(RemoteSettings):
- JSON_PATHS = (RemoteSettings.DUMPS_PATH / 'main/search-config.json',)
+ result.append(record)
+
+ cls.OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
- def _get_schema():
- PATH = arguments.MAIN_PATH / \
- 'toolkit/components/search/schema/search-engine-config-schema.json'
- with PATH.open() as file:
- return json.load(file)
+ return File(cls.OUTPUT_PATH, result)
@classmethod
- def process_raw(cls, unwrapped_jsons):
- _WHITELIST = ('ddg@search.mozilla.org', 'wikipedia@search.mozilla.org')
- SCHEMA = cls._get_schema()
+ def process(cls, parsed_jsons, parsed_schema):
+ return cls.wrap(
+ cls.process_raw(
+ cls.unwrap(parsed_jsons),
+ parsed_schema))
- search_engines, timestamps = [], []
- search_config = unwrapped_jsons[0]
- for search_engine in search_config.content:
- if search_engine['webExtension']['id'] in _WHITELIST:
- clone = copy.deepcopy(search_engine)
+class Changes(RemoteSettings):
+ JSON_PATHS = tuple(RemoteSettings.DUMPS_PATH_ABSOLUTE.glob('*/*.json'))
+ OUTPUT_PATH = RemoteSettings.DUMPS_PATH_ABSOLUTE / 'monitor/changes.json'
- if 'telemetryId' in search_engine:
- del search_engine['telemetryId']
- if 'extraParams' in search_engine:
- del search_engine['extraParams']
+ @classmethod
+ def process_raw(cls, unwrapped_jsons, parsed_schema):
+ changes = []
+
+ for collection in unwrapped_jsons:
+ if collection.path != RemoteSettings.DUMPS_PATH_ABSOLUTE / 'main/example.json':
+ latest_change = {}
+ latest_change['last_modified'] = max(
+ (record['last_modified'] for record in collection.content), default=0)
+ latest_change['bucket'] = collection.path.parent.name
+ latest_change['collection'] = collection.path.stem
+ changes.append(latest_change)
+
+ cls.OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+
+ return File(cls.OUTPUT_PATH, changes)
+
+
+class SearchConfig(RemoteSettings):
+ JSON_PATHS = (
+ RemoteSettings.DUMPS_PATH_ABSOLUTE /
+ 'main/search-config.json',
+ )
+ SCHEMA_PATH = arguments.MAIN_PATH / \
+ 'toolkit/components/search/schema/search-engine-config-schema.json'
+ OUTPUT_PATH = JSON_PATHS[0]
- general_specifier = {}
- for specifier in search_engine['appliesTo'].copy():
- if 'application' in specifier:
- if 'distributions' in specifier['application']:
- search_engine['appliesTo'].remove(specifier)
- continue
- if 'extraParams' in specifier['application']:
- del specifier['application']['extraParams']
+ _DUCKDUCKGO_SEARCH_ENGINE_ID = 'ddg@search.mozilla.org'
- if 'included' in specifier and 'everywhere' in specifier[
- 'included'] and specifier['included']['everywhere']:
- general_specifier = specifier
+ @classmethod
+ def should_drop_record(cls, search_engine):
+ return search_engine['webExtension']['id'] not in (
+ cls._DUCKDUCKGO_SEARCH_ENGINE_ID, 'wikipedia@search.mozilla.org')
- if not general_specifier:
- general_specifier = {'included': {'everywhere': True}}
- search_engine['appliesTo'].insert(0, general_specifier)
- if search_engine['webExtension']['id'] == _WHITELIST[0]:
- general_specifier['default'] = 'yes'
+ @classmethod
+ def process_record(cls, search_engine):
+ [search_engine.pop(key, None)
+ for key in ['extraParams', 'telemetryId']]
- if clone != search_engine:
- timestamp = int(round(time.time_ns() / 10 ** 6))
- while timestamp in timestamps:
- timestamp += 1
- timestamps.append(timestamp)
- search_engine['last_modified'] = timestamp
+ general_specifier = {}
+ for specifier in search_engine['appliesTo'].copy():
+ if 'application' in specifier:
+ if 'distributions' in specifier['application']:
+ search_engine['appliesTo'].remove(specifier)
+ continue
+ specifier['application'].pop('extraParams', None)
- validate(search_engine, schema=SCHEMA)
+ if 'included' in specifier and 'everywhere' in specifier[
+ 'included'] and specifier['included']['everywhere']:
+ general_specifier = specifier
- search_engines.append(search_engine)
+ if not general_specifier:
+ general_specifier = {'included': {'everywhere': True}}
+ search_engine['appliesTo'].insert(0, general_specifier)
+ if search_engine['webExtension']['id'] == cls._DUCKDUCKGO_SEARCH_ENGINE_ID:
+ general_specifier['default'] = 'yes'
- return File(search_config.path, search_engines)
+ return search_engine
-class TopSites:
+class TippyTopSites:
JSON_PATHS = (
arguments.MAIN_PATH /
'browser/components/newtab/data/content/tippytop/top_sites.json',
@@ -153,15 +178,42 @@ class TopSites:
'tippytop/top_sites.json')
@classmethod
- def process(cls, parsed_jsons):
- main_top_sites = parsed_jsons[0]
- branding_top_sites = parsed_jsons[1]
- result = branding_top_sites.content + \
- [site for site in main_top_sites.content if 'wikipedia.org' in site['domains']]
- return File(main_top_sites.path, result)
+ def process(cls, parsed_jsons, parsed_schema):
+ tippy_top_sites_main = parsed_jsons[0]
+ tippy_top_sites_branding = parsed_jsons[1]
+ result = tippy_top_sites_branding.content + \
+ [site for site in tippy_top_sites_main.content if 'wikipedia.org' in site['domains']]
+ return File(tippy_top_sites_main.path, result)
+
+class TopSites(RemoteSettings):
+ _TOP_SITES_JSON_PATH = 'main/top-sites.json'
+ _TOP_SITES_PATH_MAIN = RemoteSettings.DUMPS_PATH_ABSOLUTE / _TOP_SITES_JSON_PATH
-processors = (SearchConfig, TopSites, RemoteSettings)
+ JSON_PATHS = (
+ arguments.BRANDING_PATH /
+ RemoteSettings.DUMPS_PATH_RELATIVE /
+ _TOP_SITES_JSON_PATH,
+ _TOP_SITES_PATH_MAIN)
+ OUTPUT_PATH = _TOP_SITES_PATH_MAIN
+
+ @classmethod
+ def should_modify_collection(cls, collection):
+ return cls._TOP_SITES_PATH_MAIN == collection.path
+
+ @classmethod
+ def should_drop_record(cls, site):
+ return site['url'] != 'https://www.wikipedia.org/'
+
+ @classmethod
+ def process_record(cls, site):
+ site.pop('exclude_regions', None)
+ return site
+
+
+# To reflect the latest timestamps, Changes class should always come after
+# all other RemoteSettings subclasses
+processors = (TippyTopSites, SearchConfig, TopSites, Changes)
for processor in processors:
parsed_jsons = []
@@ -169,6 +221,11 @@ for processor in processors:
with json_path.open() as file:
parsed_jsons.append(File(json_path, json.load(file)))
- processed = processor.process(parsed_jsons)
+ parsed_schema = None
+ if hasattr(processor, "SCHEMA_PATH"):
+ with processor.SCHEMA_PATH.open() as file:
+ parsed_schema = json.load(file)
+
+ processed = processor.process(parsed_jsons, parsed_schema)
with processed.path.open('w') as file:
json.dump(processed.content, file, indent=arguments.indent)