Очень часто приходится слышать, что Sqoop — это серебряная пуля для загрузки данных большого объёма с реляционных БД в Hadoop, особенно с Oracle, и Spark-ом невозможно достигнуть такой производительности. При этом приводят аргументы, что sqoop — это инструмент, заточенный под загрузку, а Spark предназначен для обработки данных.
Меня зовут Максим Петров, я руководитель департамента "Чаптер инженеров данных и разработчиков", и я решил написать инструкцию о том, как правильно и быстро загружать данные Spark, основываясь на принципах загрузки Sqoop.
Первичное сравнение технологий
В нашем примере будем рассматривать загрузку данных из таблиц OracleDB.
Рассмотрим случай, когда нам необходимо полностью перегрузить таблицу/партицию на кластер Hadoop c созданием метаданных hive.
Осторожно, много кода
-- DWH.SERVICE_FEATURE definition CREATE TABLE "DWH"."SERVICE_FEATURE" ( "BAN" NUMBER(9,0) NOT NULL ENABLE, "SUBSCRIBER_NO" CHAR(11) NOT NULL ENABLE, "SOC" CHAR(9) NOT NULL ENABLE, "SOC_SEQ_NO" NUMBER(9,0) NOT NULL ENABLE, "SERVICE_FTR_SEQ_NO" NUMBER(9,0) NOT NULL ENABLE, "SYS_CREATION_DATE" DATE NOT NULL ENABLE, "SYS_UPDATE_DATE" DATE, "OPERATOR_ID" NUMBER(9,0), "APPLICATION_ID" CHAR(6), "DL_SERVICE_CODE" CHAR(5), "DL_UPDATE_STAMP" NUMBER(4,0), "SOC_EFFECTIVE_DATE" DATE NOT NULL ENABLE, "CUSTOMER_ID" NUMBER(9,0) NOT NULL ENABLE, "FEATURE_CODE" CHAR(6) NOT NULL ENABLE, "SERVICE_TYPE" CHAR(1) NOT NULL ENABLE, "SOC_LEVEL_CODE" CHAR(1) NOT NULL ENABLE, "FTR_EFFECTIVE_DATE" DATE NOT NULL ENABLE, "FTR_EFF_RSN_CODE" CHAR(1), "FTR_EXPIRATION_DATE" DATE, "FTR_EXP_RSN_CODE" CHAR(1), "RC_WAIVER_EFF_DATE" DATE, "RC_WAIVER_EXPR_DATE" DATE, "RC_WAIVER_RSN" CHAR(6), "ADDITIONAL_INFO_AMT" NUMBER(11,2), "ADDITIONAL_INFO_TYPE" CHAR(1), "ADDITIONAL_INFO" VARCHAR2(500), "CHARGE_LEVEL_CODE" CHAR(1), "BEN" NUMBER(5,0), "REVENUE_CODE" CHAR(3), "SOC_RELATED" CHAR(9), "BL_PROM_SORT_CODE" NUMBER(2,0), "SERVICE_CLASS" CHAR(3), "RATE_CODE" CHAR(9), "RC_EXPIRATION_DATE" DATE, "RC_ADVPYM_WAIVE_EFF_DT" DATE, "RC_ADVPYM_WAIVE_EXPR_DT" DATE, "RC_ADVPYM_WAIVE_RSN" CHAR(6), "RC_ADVPYM_BILL_SEQ_NO" NUMBER(3,0), "RENEWAL_RATE_CODE" CHAR(9), "SECONDARY_TN" CHAR(11), "CONV_RUN_NO" NUMBER(3,0), "PORT_IN_START_DATE" DATE, "PORT_IN_END_DATE" DATE ) PCTFREE 10 PCTUSED 40 INITRANS 7 MAXTRANS 255 NOCOMPRESS LOGGING STORAGE( BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" PARTITION BY RANGE ("BAN") (PARTITION "SERVICE_FEATURE_1" VALUES LESS THAN (18424929) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_2" VALUES LESS THAN (43933393) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_3" VALUES LESS THAN (65938220) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_4" VALUES LESS THAN (78030882) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_5" VALUES LESS THAN (89426107) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_6" VALUES LESS THAN (99364535) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_7" VALUES LESS THAN (105756681) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_8" VALUES LESS THAN (111776927) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_9" VALUES LESS THAN (117780453) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_10" VALUES LESS THAN (123780786) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_11" VALUES LESS THAN (129789421) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_12" VALUES LESS THAN (135790871) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_13" VALUES LESS THAN (141825258) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_14" VALUES LESS THAN (147827477) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_15" VALUES LESS THAN (153829228) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_16" VALUES LESS THAN (159835744) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_17" VALUES LESS THAN (165838495) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_18" VALUES LESS THAN (171850405) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_19" VALUES LESS THAN (177898201) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_20" VALUES LESS THAN (183970359) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_21" VALUES LESS THAN (190047208) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_22" VALUES LESS THAN (196075759) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_23" VALUES LESS THAN (202078088) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_24" VALUES LESS THAN (208151076) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_25" VALUES LESS THAN (345838310) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_26" VALUES LESS THAN (351838775) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_27" VALUES LESS THAN (357839253) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_28" VALUES LESS THAN (363851043) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_29" VALUES LESS THAN (369857876) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_30" VALUES LESS THAN (375861106) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_31" VALUES LESS THAN (381863539) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_32" VALUES LESS THAN (387914806) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_33" VALUES LESS THAN (393921028) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_34" VALUES LESS THAN (399926438) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_35" VALUES LESS THAN (405929686) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_36" VALUES LESS THAN (411936420) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_37" VALUES LESS THAN (417974400) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_38" VALUES LESS THAN (423985227) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_39" VALUES LESS THAN (429992484) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_40" VALUES LESS THAN (436183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_41" VALUES LESS THAN (442183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_42" VALUES LESS THAN (448183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_43" VALUES LESS THAN (454183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_44" VALUES LESS THAN (460183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_45" VALUES LESS THAN (466183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_46" VALUES LESS THAN (472183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_47" VALUES LESS THAN (478183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_48" VALUES LESS THAN (484183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_49" VALUES LESS THAN (490183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_50" VALUES LESS THAN (496183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_51" VALUES LESS THAN (502183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_52" VALUES LESS THAN (508183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_53" VALUES LESS THAN (514183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_54" VALUES LESS THAN (520183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_55" VALUES LESS THAN (526183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_56" VALUES LESS THAN (532183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_57" VALUES LESS THAN (538183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_58" VALUES LESS THAN (544183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_59" VALUES LESS THAN (550183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_60" VALUES LESS THAN (556183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_61" VALUES LESS THAN (562183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_62" VALUES LESS THAN (568183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_63" VALUES LESS THAN (574183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_64" VALUES LESS THAN (580183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_65" VALUES LESS THAN (586183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_66" VALUES LESS THAN (592183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_67" VALUES LESS THAN (598183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_68" VALUES LESS THAN (604183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_69" VALUES LESS THAN (610183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_70" VALUES LESS THAN (616183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_71" VALUES LESS THAN (622183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_72" VALUES LESS THAN (628183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_73" VALUES LESS THAN (634183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_74" VALUES LESS THAN (640183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_75" VALUES LESS THAN (646183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_76" VALUES LESS THAN (652183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_77" VALUES LESS THAN (658183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_78" VALUES LESS THAN (664183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_79" VALUES LESS THAN (670183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_80" VALUES LESS THAN (676183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_81" VALUES LESS THAN (682183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_82" VALUES LESS THAN (688183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_83" VALUES LESS THAN (694183736) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC NOLOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" , PARTITION "SERVICE_FEATURE_MAX" VALUES LESS THAN (MAXVALUE) NO INMEMORY SEGMENT CREATION IMMEDIATE PCTFREE 0 PCTUSED 40 INITRANS 7 MAXTRANS 255 COMPRESS BASIC LOGGING STORAGE(INITIAL 8388608 NEXT 1048576 MINEXTENTS 1 MAXEXTENTS 2147483645 PCTINCREASE 0 FREELISTS 1 FREELIST GROUPS 1 BUFFER_POOL KEEP FLASH_CACHE DEFAULT CELL_FLASH_CACHE DEFAULT) TABLESPACE "DATA" ) PARALLEL 8 ;
Будем загружать самую большую партицию SERVICE_FEATURE_MAX (14 млрд строк)
Загрузка с помощью Sqoop будет выглядеть следующим образом:
sqoop import \ --num-mappers 8 \ --fetch-size 100000 \ -Dmapreduce.task.timeout=0 \ -D mapreduce.map.memory.mb=16384 \ -Doraoop.import.partitions='"SERVICE_FEATURE_MAX"' \ --hcatalog-database customer360_stg \ --hcatalog-table customer_oracle_sqoop \ --connect jdbc:oracle:thin:@//10.31.178.66:1521/ORCL \ --username user \ --password password \ --table DWH.SERVICE_FEATURE \ --drop-and-create-hcatalog-table \ --hcatalog-storage-stanza 'stored as orcfile' \ --direct
Потребляемые ресурсы: 9 Cores, 144GB RAM
Время загрузки: ~ 26 часов (14 часов, если загружать в текстовом формате хранения)
Теперь попробуем загрузить эту же таблицу через Spark.
Будем сразу же грузить в 8 потоков, как в Sqoop.
Применив способ, который описан в документации по Spark, и взяв псевдостолбец ROWNUM за поле, по которому будем делить таблицу (можно использовать другое поле, по которому есть равномерное распределение и можно разделить на порции), мы получим вот такой код:
//Получаем максимальное значение rownum val max = spark. read. format("jdbc"). option("url", "jdbc:oracle:thin:@//10.31.178.66:1521/ORCL"). option("user", "user"). option("password", "password"). option("driver", "oracle.jdbc.driver.OracleDriver"). option("dbtable", s"( select max(rownum) as row_num FROM DWH.SERVICE_FEATURE PARTITION (SERVICE_FEATURE))"). load(). collect(). map(_.getDecimal(0)). head.toBigInteger //Загружаем данные val etlDF = spark. read. format("jdbc"). option("url", "jdbc:oracle:thin:@//10.31.178.66:1521/ORCL"). option("user", "user"). option("password", "password"). option("driver", "oracle.jdbc.driver.OracleDriver"). option("dbtable", "( select a.*, rownum as row_num FROM DWH.SERVICE_FEATURE PARTITION (SERVICE_FEATURE) a)" ). option("partitionColumn", "row_num"). option("upperBound", max.toString().toInt). option("lowerBound", 0). option("numPartitions", 8). option("fetchSize", "10000"). option("driver", "oracle.jdbc.driver.OracleDriver"). load() //сохраняем etlDF. write. mode(SaveMode.Overwrite). format("orc"). saveAsTable("customer360_stg.customer_oracle_spark")
Потребляемые ресурсы: 9 Cores, 132GB RAM
Время загрузки: Принудительно завершил загрузку после 35 часов
Почему так происходит?
После таких результатов в основном делают выводы, что сравнение загрузок через Sqoop и Spark завершено и предпочтение надо отдавать Sqoop.
Давайте попробуем разобраться, в чём же магия загрузки через Sqoop, ведь и Sqoop, и Spark используют JDBC-соединение (которое никак не кастомизировалось, кроме как fetchSize), с одинаковым количеством сессий. Но при этом Sqoop использует устаревшую технологию MapReduce, а Spark все операции проделывает в оперативной памяти, что должно сказаться на производительности при преобразовании данных в формат ORC.
Но это не так, так как в нашем случае при загрузке через Spark все 8 потоков делают fullscan партиции, отфильтруют нужные значения и отправят их на executor, а так как объёмы данных большие, то они не будут помещаться в память и будет происходить spill to disk и много еще других операций, которые будут только увеличивать время загрузки. Чтобы избавится от спила, необходимо увеличивать число параллелей, а это увеличивает число сессий и фулсканов таблицы.
И для того, чтобы начать эффективно загружать таблицу, необходимо разобраться, почему Sqoop не делает fullscan для каждой сессии, а знает, что ему прочитать с максимальной эффективностью, так, что каждая сессия знает, какой ей блок прочитать. О блока, экстентах, сегментах можно прочитать вот здесь.
Если запустить загрузку через Sqoop и посмотреть, какие запросы делает загрузчик к базе данных, то там можно обнаружить вот такой интересный запрос:
SELECT data_object_id, file_id, relative_fno, file_batch, MIN (start_block_id) start_block_id, MAX (end_block_id) end_block_id, SUM (blocks) blocks FROM (SELECT o.data_object_id, e.file_id, e.relative_fno, e.block_id start_block_id, e.block_id + e.blocks - 1 end_block_id, e.blocks, CEIL (SUM(e.blocks) OVER (PARTITION BY o.data_object_id, e.file_id ORDER BY e.block_id ASC) / (SUM (e.blocks)OVER (PARTITION BY o.data_object_id,e.file_id) / 21)) file_batch FROM dba_extents e, dba_objects o, dba_tab_subpartitions tsp WHERE o.owner = 'DWH' AND o.object_name = 'SERVICE_FRATURE' AND e.owner = 'DWH' AND e.segment_name = 'SERVICE_FRATURE' AND o.owner = e.owner AND o.object_name = e.segment_name AND (o.subobject_name = e.partition_name OR (o.subobject_name IS NULL AND e.partition_name IS NULL)) AND o.owner = tsp.table_owner(+) AND o.object_name = tsp.table_name(+) AND o.subobject_name = tsp.subpartition_name(+)) GROUP BY data_object_id, file_id, relative_fno, file_batch ORDER BY data_object_id, file_id, relative_fno, file_batch
Результат этого запроса будет выглядеть следующим образом:

Посмотрим, какую информацию выводит запрос, обратившись к документации Oracle:
№ | Column | Description | Comment |
1 | DATA_OBJECT_ID | Dictionary object number of the segment that contains the object |
|
2 | FILE_ID | File identifier number of the file containing the extent |
|
3 | RELATIVE_FNO | Relative file number of the first extent block |
|
4 | FILE_BATCH | Number оf the batch in file |
|
5 | START_BLOCK_ID | Starting block number of the extent |
|
6 | END_BLOCK_ID | Block number of the extent | START_BLOCK_ID + BLOCKS - 1 |
7 | BLOCKS | Size of the extent in Oracle blocks |
|
8 | SUBOBJECT_NAME | Name of the subobject (for example, partition) | Поле в выводе не участвует, но в дальнейшем оно нам понадобится |
На основании полученных данных Sqoop строит свои запросы для выгрузки, которые выглядят так:
SELECT /*+ NO_INDEX(t) */ CUSTOMER_ID,SYS_CREATION_DATE,SYS_UPDATE_DATE,OPERATOR_ID,APPLICATION_ID,DL_SERVICE_CODE,DL_UPDATE_STAMP,CONTACT_TELNO,CONTACT_TN_EXTNO,CONTACT_FAXNO,CONTACT_COUNTRY_CODE,WORK_TELNO,WORK_TN_EXTNO,HOME_TELNO,OTHER_TELNO,OTHER_EXTNO,OTHER_TN_TYPE,BIRTH_DATE,EMPLOYMENT_DT,EMPLYR_NAME,EMPLOYEE_QTY,YEARS_IN_BUSINESS,EMPLOYEE_POSITION,BUSINESS_EST_DATE,MARKET_ID,GUR_CR_CARD_TYPE,GUR_CR_CARD_NO,GUR_CR_CARD_EXP_DT,ACC_PASSWORD,VERIFIED_DATE,COMPANY_REGISTER_NUM,BUSINESS_CATEGORY,CONV_RUN_NO,LEGACY_CST_NUM,E_MAIL_ADDR,MAILSHOT,IMEI_BLACK_LIST_PASS,COMPANY_TYPE,CUST_GENDER,CUST_PERSONAL_ID,CUST_NATIONALITY,CUST_DOC_TYPE,CUST_DOC_ID,CUST_DOC_NO,CUST_DOC_DATE,CUST_DOC_ISSUE,EMP_NO,AUTPRSN_BIRTH_DATE,AUTPRSN_PERSONAL_ID,GUARANTOR_TYPE,GUR_PERSONAL_ID,GUR_COMPANY_REG_NUM,START_TIME_AT_BANK,GUR_BIRTH_DATE,GUR_TAX_NUMBER,REGISTER_IND,CONTACT_TELNO_2ND,CONTACT_TN_EXTNO_2ND,CONTACT_FAXNO_2ND,CONTACT_COUNTRY_CODE_2ND,CREDIT_CARD_IND,AUTHORIZED_SEC,LEGAL_NAME_FORM,'1024_4' data_chunk_id FROM DWH.CUSTOMER t WHERE ((rowid >= dbms_rowid.rowid_create(1, 510042, 1024, 127475712, 0) AND rowid <= dbms_rowid.rowid_create(1, 510042, 1024, 127696895, 32767))) UNION ALL SELECT /*+ NO_INDEX(t) */ CUSTOMER_ID,SYS_CREATION_DATE,SYS_UPDATE_DATE,OPERATOR_ID,APPLICATION_ID,DL_SERVICE_CODE,DL_UPDATE_STAMP,CONTACT_TELNO,CONTACT_TN_EXTNO,CONTACT_FAXNO,CONTACT_COUNTRY_CODE,WORK_TELNO,WORK_TN_EXTNO,HOME_TELNO,OTHER_TELNO,OTHER_EXTNO,OTHER_TN_TYPE,BIRTH_DATE,EMPLOYMENT_DT,EMPLYR_NAME,EMPLOYEE_QTY,YEARS_IN_BUSINESS,EMPLOYEE_POSITION,BUSINESS_EST_DATE,MARKET_ID,GUR_CR_CARD_TYPE,GUR_CR_CARD_NO,GUR_CR_CARD_EXP_DT,ACC_PASSWORD,VERIFIED_DATE,COMPANY_REGISTER_NUM,BUSINESS_CATEGORY,CONV_RUN_NO,LEGACY_CST_NUM,E_MAIL_ADDR,MAILSHOT,IMEI_BLACK_LIST_PASS,COMPANY_TYPE,CUST_GENDER,CUST_PERSONAL_ID,CUST_NATIONALITY,CUST_DOC_TYPE,CUST_DOC_ID,CUST_DOC_NO,CUST_DOC_DATE,CUST_DOC_ISSUE,EMP_NO,AUTPRSN_BIRTH_DATE,AUTPRSN_PERSONAL_ID,GUARANTOR_TYPE,GUR_PERSONAL_ID,GUR_COMPANY_REG_NUM,START_TIME_AT_BANK,GUR_BIRTH_DATE,GUR_TAX_NUMBER,REGISTER_IND,CONTACT_TELNO_2ND,CONTACT_TN_EXTNO_2ND,CONTACT_FAXNO_2ND,CONTACT_COUNTRY_CODE_2ND,CREDIT_CARD_IND,AUTHORIZED_SEC,LEGAL_NAME_FORM,'1024_12' data_chunk_id FROM DWH.CUSTOMER t WHERE ((rowid >= dbms_rowid.rowid_create(1, 510042, 1024, 129204224, 0) AND rowid <= dbms_rowid.rowid_create(1, 510042, 1024, 129417215, 32767)))
Таких запросов будет столько, сколько вами указано потоков загрузки. У этих запросов есть один минус – бывает, что количество FILE_BATCH в десятки раз превышает число потоков, тем самым увеличивая количество UNION для одной сессии, и так как этот запрос выполняется на стороне Oracle, то это сказывается на производительности базы.
Запрос ничем не отличается от обычного селекта, который мы делали в нашей первой реализации Spark выгрузки, кроме того, по какому полю он фильтрует данные - rowid.
Большие данные в билайн мы используем сразу в ряде наших продуктов для бизнеса — от сервисов по продвижению товаров и услуг до аналитики разного уровня.
Реализация быстрой загрузки с помощью Spark
Опираясь на полученные знания о том, как строит свои запросы Sqoop, и проблемы, с которыми мы можем столкнуться при его использовании, напишем свою реализацию этого подхода выгрузки.
Пререквезиты:
Для ТУЗ, из-под которой будем делать загрузку, должны быть следующие права (этого требует и Sqoop):
grant select on v_$instance to user; grant select on dba_tables to user; grant select on dba_tab_columns to user; grant select on dba_objects to user; grant select on dba_extents to user; grant select on dba_segments to user; grant select on dba_constraints to user; grant select on v_$database to user; grant select on v_$parameter to user; grant select on dba_tab_subpartitions to user;
Так как мы хотим написать переиспользуемое приложение, то параметризируем его:
//количество потоков val cntSession = 8 //на сколько порций разделим нашу загрузку - аналог FILE_BATCH у Sqoop val cntChunk = 512 //параметр JDBC подключения val jdbcUrl = "jdbc:oracle:thin:@//10.31.178.66:1521/ORCL" val fetchSize = 100000 val orclUsr = "user" val orclPwd = "password" //метаданные для источника val orclSchema = "DWH" val orclTable = "SERVICE_FEATURE" val orclPartition = "SERVICE_FEATURE_MAX" //метаданные для приёмника val hivePath = "/warehouse/tablespace/external/hive/customer360_stg.db/service_feature_orcl" val hiveTable = "customer360_stg.service_feature_orcl" val trgNumFiles = 400
Реализуем получение метаинформации для наших чанков, добавив в запрос Sqoop дополнительную информацию о имени партиции, чтобы можно было выгружать отдельные партиции.
val etlDFChunkAll = spark. read. format("jdbc"). option("url", jdbcUrl). option("user", orclUsr). option("password", orclPwd). option("driver", "oracle.jdbc.driver.OracleDriver"). option("dbtable", s"""( SELECT data_object_id, file_id, relative_fno, file_batch, subobject_name, MIN (start_block_id) start_block_id, MAX (end_block_id) end_block_id, SUM (blocks) blocks FROM (SELECT o.data_object_id, o.subobject_name, e.file_id, e.relative_fno, e.block_id start_block_id, e.block_id + e.blocks - 1 end_block_id, e.blocks, CEIL (SUM(e.blocks) OVER (PARTITION BY o.data_object_id, e.file_id ORDER BY e.block_id ASC) / (SUM (e.blocks)OVER (PARTITION BY o.data_object_id,e.file_id) / $cntChunk)) file_batch FROM dba_extents e, dba_objects o, dba_tab_subpartitions tsp WHERE o.owner = '$orclSchema' AND o.object_name = '$orclTable' AND e.owner = '$orclSchema' AND e.segment_name = '$orclTable' AND o.owner = e.owner AND o.object_name = e.segment_name AND (o.subobject_name = e.partition_name OR (o.subobject_name IS NULL AND e.partition_name IS NULL)) AND o.owner = tsp.table_owner(+) AND o.object_name = tsp.table_name(+) AND o.subobject_name = tsp.subpartition_name(+)) GROUP BY data_object_id, file_id, relative_fno, file_batch, subobject_name ORDER BY data_object_id, file_id, relative_fno, file_batch, subobject_name)"""). load()
Определяем, если надо выгрузить только партицию.
val etlDFChunk = if (orclPartition == "") etlDFChunkAll else etlDFChunkAll. filter($"subobject_name" === orclPartition)
Получаем список полей (админы Oracle запрещают выгружать по *)
val lstColumn = spark. read. format("jdbc"). option("url", jdbcUrl). option("user", orclUsr). option("password", orclPwd). option("driver", "oracle.jdbc.driver.OracleDriver"). option("dbtable", s"( SELECT * FROM $orclSchema.$orclTable WHERE 1=0 )"). load(). columns. toList
Реализуем функцию, которая генерирует запрос, и получим список сгруппированных запросов.
Каждая группа содержит столько запросов, сколько у нас должно быть потоков (8), чтобы каждая группа выполнялась в параллель.
def mkQuery(lstColumn: List[String]) = udf((data_object_id: Integer, relative_fno: Integer, start_block_id: Integer, end_block_id: Integer) => s"""SELECT /*+ NO_INDEX(t) */ ${lstColumn.mkString(", ")} FROM $orclSchema.$orclTable WHERE ((rowid >= dbms_rowid.rowid_create(1, $data_object_id, $relative_fno, $start_block_id, 0) AND rowid <= dbms_rowid.rowid_create(1, $data_object_id, $relative_fno, $end_block_id, 32767)))""") val lstQueries = etlDFChunk. withColumn("query", mkQuery(lstColumn)($"data_object_id" cast (IntegerType), $"relative_fno" cast (IntegerType), $"start_block_id" cast (IntegerType), $"end_block_id" cast (IntegerType))). select($"relative_fno", $"file_batch" cast (IntegerType), $"query"). collect(). map(row => s"""${row.getInt(0)}_${row.getInt(1)}""" -> row.getString(2)).toMap.grouped(cntSession). toList
Выполняем запросы в параллель, через параллельные коллекции Scala, записывая результаты во временную директорию.
lstQueries.foreach{ lst => val lstPar = lst.par lstPar.tasksupport = new ForkJoinTaskSupport(new ForkJoinPool(cntSession)) lstPar.foreach{query => spark. read. format("jdbc"). option("url", jdbcUrl). option("user", orclUsr). option("password", orclPwd). option("driver", "oracle.jdbc.driver.OracleDriver"). option("dbtable", s"( ${query._2} )"). option("fetchSize", fetchSize). load(). write. mode(SaveMode.Overwrite). orc(s"${hivePath}_tmp/part_${query._1}") } }
Читаем полученный результат, объединяем, записываем в целевую таблицу, очищаем временную директорию.
val mapQuery = lstQueries.flatten.toMap val trgDf = mapQuery.map { query => spark. read. orc(s"${hivePath}_tmp/part_${query._1}") }. reduce(_ union (_)) trgDf. coalesce(trgNumFiles). write. mode(SaveMode.Overwrite). option("path", hivePath). format("orc"). saveAsTable(hiveTable) val confFs = spark.sparkContext.hadoopConfiguration val fs = FileSystem.get(confFs) val path = new Path(s"${hivePath}_tmp") if (fs.exists(path)) fs.delete(path, true) }
Результат работы:
Потребляемые ресурсы: 9 Cores, 132GB RAM
Время загрузки: 12 часов
Итоги
Как мы видим, всё-таки Spark можно, даже нужно использовать для загрузки данных, только стоит оптимально написать приложение и тогда результат будет на уровне Sqoop или лучше.
В этой статье рассмотрен один из вариантов того, как грузит Sqoop, возможно, есть и другие методы загрузки, которые он использует у себя под капотом. Но, используя такую методологию, всю эту магию можно реализовать в Spark. Возможно, вам это и не придётся делать и будет достаточно переиспользования кода этой статьи.
P.S. Код можно завернуть в jar, вынеся настройку через параметры, и использовать его в своих загрузках.
