mirror of
				https://gitlab.sectorq.eu/jaydee/omv_backup.git
				synced 2025-10-31 02:21:10 +01:00 
			
		
		
		
	added v3
This commit is contained in:
		| @@ -0,0 +1,8 @@ | ||||
| from __future__ import annotations | ||||
|  | ||||
| from .__main__ import cli_detect, query_yes_no | ||||
|  | ||||
| __all__ = ( | ||||
|     "cli_detect", | ||||
|     "query_yes_no", | ||||
| ) | ||||
| @@ -0,0 +1,321 @@ | ||||
| from __future__ import annotations | ||||
|  | ||||
| import argparse | ||||
| import sys | ||||
| from json import dumps | ||||
| from os.path import abspath, basename, dirname, join, realpath | ||||
| from platform import python_version | ||||
| from unicodedata import unidata_version | ||||
|  | ||||
| import charset_normalizer.md as md_module | ||||
| from charset_normalizer import from_fp | ||||
| from charset_normalizer.models import CliDetectionResult | ||||
| from charset_normalizer.version import __version__ | ||||
|  | ||||
|  | ||||
| def query_yes_no(question: str, default: str = "yes") -> bool: | ||||
|     """Ask a yes/no question via input() and return their answer. | ||||
|  | ||||
|     "question" is a string that is presented to the user. | ||||
|     "default" is the presumed answer if the user just hits <Enter>. | ||||
|         It must be "yes" (the default), "no" or None (meaning | ||||
|         an answer is required of the user). | ||||
|  | ||||
|     The "answer" return value is True for "yes" or False for "no". | ||||
|  | ||||
|     Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input | ||||
|     """ | ||||
|     valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False} | ||||
|     if default is None: | ||||
|         prompt = " [y/n] " | ||||
|     elif default == "yes": | ||||
|         prompt = " [Y/n] " | ||||
|     elif default == "no": | ||||
|         prompt = " [y/N] " | ||||
|     else: | ||||
|         raise ValueError("invalid default answer: '%s'" % default) | ||||
|  | ||||
|     while True: | ||||
|         sys.stdout.write(question + prompt) | ||||
|         choice = input().lower() | ||||
|         if default is not None and choice == "": | ||||
|             return valid[default] | ||||
|         elif choice in valid: | ||||
|             return valid[choice] | ||||
|         else: | ||||
|             sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n") | ||||
|  | ||||
|  | ||||
| def cli_detect(argv: list[str] | None = None) -> int: | ||||
|     """ | ||||
|     CLI assistant using ARGV and ArgumentParser | ||||
|     :param argv: | ||||
|     :return: 0 if everything is fine, anything else equal trouble | ||||
|     """ | ||||
|     parser = argparse.ArgumentParser( | ||||
|         description="The Real First Universal Charset Detector. " | ||||
|         "Discover originating encoding used on text file. " | ||||
|         "Normalize text to unicode." | ||||
|     ) | ||||
|  | ||||
|     parser.add_argument( | ||||
|         "files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed" | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "-v", | ||||
|         "--verbose", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         dest="verbose", | ||||
|         help="Display complementary information about file if any. " | ||||
|         "Stdout will contain logs about the detection process.", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "-a", | ||||
|         "--with-alternative", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         dest="alternatives", | ||||
|         help="Output complementary possibilities if any. Top-level JSON WILL be a list.", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "-n", | ||||
|         "--normalize", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         dest="normalize", | ||||
|         help="Permit to normalize input file. If not set, program does not write anything.", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "-m", | ||||
|         "--minimal", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         dest="minimal", | ||||
|         help="Only output the charset detected to STDOUT. Disabling JSON output.", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "-r", | ||||
|         "--replace", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         dest="replace", | ||||
|         help="Replace file when trying to normalize it instead of creating a new one.", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "-f", | ||||
|         "--force", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         dest="force", | ||||
|         help="Replace file without asking if you are sure, use this flag with caution.", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "-i", | ||||
|         "--no-preemptive", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         dest="no_preemptive", | ||||
|         help="Disable looking at a charset declaration to hint the detector.", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "-t", | ||||
|         "--threshold", | ||||
|         action="store", | ||||
|         default=0.2, | ||||
|         type=float, | ||||
|         dest="threshold", | ||||
|         help="Define a custom maximum amount of noise allowed in decoded content. 0. <= noise <= 1.", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--version", | ||||
|         action="version", | ||||
|         version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format( | ||||
|             __version__, | ||||
|             python_version(), | ||||
|             unidata_version, | ||||
|             "OFF" if md_module.__file__.lower().endswith(".py") else "ON", | ||||
|         ), | ||||
|         help="Show version information and exit.", | ||||
|     ) | ||||
|  | ||||
|     args = parser.parse_args(argv) | ||||
|  | ||||
|     if args.replace is True and args.normalize is False: | ||||
|         if args.files: | ||||
|             for my_file in args.files: | ||||
|                 my_file.close() | ||||
|         print("Use --replace in addition of --normalize only.", file=sys.stderr) | ||||
|         return 1 | ||||
|  | ||||
|     if args.force is True and args.replace is False: | ||||
|         if args.files: | ||||
|             for my_file in args.files: | ||||
|                 my_file.close() | ||||
|         print("Use --force in addition of --replace only.", file=sys.stderr) | ||||
|         return 1 | ||||
|  | ||||
|     if args.threshold < 0.0 or args.threshold > 1.0: | ||||
|         if args.files: | ||||
|             for my_file in args.files: | ||||
|                 my_file.close() | ||||
|         print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr) | ||||
|         return 1 | ||||
|  | ||||
|     x_ = [] | ||||
|  | ||||
|     for my_file in args.files: | ||||
|         matches = from_fp( | ||||
|             my_file, | ||||
|             threshold=args.threshold, | ||||
|             explain=args.verbose, | ||||
|             preemptive_behaviour=args.no_preemptive is False, | ||||
|         ) | ||||
|  | ||||
|         best_guess = matches.best() | ||||
|  | ||||
|         if best_guess is None: | ||||
|             print( | ||||
|                 'Unable to identify originating encoding for "{}". {}'.format( | ||||
|                     my_file.name, | ||||
|                     ( | ||||
|                         "Maybe try increasing maximum amount of chaos." | ||||
|                         if args.threshold < 1.0 | ||||
|                         else "" | ||||
|                     ), | ||||
|                 ), | ||||
|                 file=sys.stderr, | ||||
|             ) | ||||
|             x_.append( | ||||
|                 CliDetectionResult( | ||||
|                     abspath(my_file.name), | ||||
|                     None, | ||||
|                     [], | ||||
|                     [], | ||||
|                     "Unknown", | ||||
|                     [], | ||||
|                     False, | ||||
|                     1.0, | ||||
|                     0.0, | ||||
|                     None, | ||||
|                     True, | ||||
|                 ) | ||||
|             ) | ||||
|         else: | ||||
|             x_.append( | ||||
|                 CliDetectionResult( | ||||
|                     abspath(my_file.name), | ||||
|                     best_guess.encoding, | ||||
|                     best_guess.encoding_aliases, | ||||
|                     [ | ||||
|                         cp | ||||
|                         for cp in best_guess.could_be_from_charset | ||||
|                         if cp != best_guess.encoding | ||||
|                     ], | ||||
|                     best_guess.language, | ||||
|                     best_guess.alphabets, | ||||
|                     best_guess.bom, | ||||
|                     best_guess.percent_chaos, | ||||
|                     best_guess.percent_coherence, | ||||
|                     None, | ||||
|                     True, | ||||
|                 ) | ||||
|             ) | ||||
|  | ||||
|             if len(matches) > 1 and args.alternatives: | ||||
|                 for el in matches: | ||||
|                     if el != best_guess: | ||||
|                         x_.append( | ||||
|                             CliDetectionResult( | ||||
|                                 abspath(my_file.name), | ||||
|                                 el.encoding, | ||||
|                                 el.encoding_aliases, | ||||
|                                 [ | ||||
|                                     cp | ||||
|                                     for cp in el.could_be_from_charset | ||||
|                                     if cp != el.encoding | ||||
|                                 ], | ||||
|                                 el.language, | ||||
|                                 el.alphabets, | ||||
|                                 el.bom, | ||||
|                                 el.percent_chaos, | ||||
|                                 el.percent_coherence, | ||||
|                                 None, | ||||
|                                 False, | ||||
|                             ) | ||||
|                         ) | ||||
|  | ||||
|             if args.normalize is True: | ||||
|                 if best_guess.encoding.startswith("utf") is True: | ||||
|                     print( | ||||
|                         '"{}" file does not need to be normalized, as it already came from unicode.'.format( | ||||
|                             my_file.name | ||||
|                         ), | ||||
|                         file=sys.stderr, | ||||
|                     ) | ||||
|                     if my_file.closed is False: | ||||
|                         my_file.close() | ||||
|                     continue | ||||
|  | ||||
|                 dir_path = dirname(realpath(my_file.name)) | ||||
|                 file_name = basename(realpath(my_file.name)) | ||||
|  | ||||
|                 o_: list[str] = file_name.split(".") | ||||
|  | ||||
|                 if args.replace is False: | ||||
|                     o_.insert(-1, best_guess.encoding) | ||||
|                     if my_file.closed is False: | ||||
|                         my_file.close() | ||||
|                 elif ( | ||||
|                     args.force is False | ||||
|                     and query_yes_no( | ||||
|                         'Are you sure to normalize "{}" by replacing it ?'.format( | ||||
|                             my_file.name | ||||
|                         ), | ||||
|                         "no", | ||||
|                     ) | ||||
|                     is False | ||||
|                 ): | ||||
|                     if my_file.closed is False: | ||||
|                         my_file.close() | ||||
|                     continue | ||||
|  | ||||
|                 try: | ||||
|                     x_[0].unicode_path = join(dir_path, ".".join(o_)) | ||||
|  | ||||
|                     with open(x_[0].unicode_path, "wb") as fp: | ||||
|                         fp.write(best_guess.output()) | ||||
|                 except OSError as e: | ||||
|                     print(str(e), file=sys.stderr) | ||||
|                     if my_file.closed is False: | ||||
|                         my_file.close() | ||||
|                     return 2 | ||||
|  | ||||
|         if my_file.closed is False: | ||||
|             my_file.close() | ||||
|  | ||||
|     if args.minimal is False: | ||||
|         print( | ||||
|             dumps( | ||||
|                 [el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__, | ||||
|                 ensure_ascii=True, | ||||
|                 indent=4, | ||||
|             ) | ||||
|         ) | ||||
|     else: | ||||
|         for my_file in args.files: | ||||
|             print( | ||||
|                 ", ".join( | ||||
|                     [ | ||||
|                         el.encoding or "undefined" | ||||
|                         for el in x_ | ||||
|                         if el.path == abspath(my_file.name) | ||||
|                     ] | ||||
|                 ) | ||||
|             ) | ||||
|  | ||||
|     return 0 | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     cli_detect() | ||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
		Reference in New Issue
	
	Block a user